From cf0daedefad6bdebe13e362a2ef974ab5096b48a Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Tue, 27 Jan 2026 13:48:08 +0000 Subject: [PATCH] Changed schema for Value -> Term, majorly breaking change (#622) * Changed schema for Value -> Term, majorly breaking change * Following the schema change, Value -> Term into all processing * Updated Cassandra for g, p, s, o index patterns (7 indexes) * Reviewed and updated all tests * Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down --- Makefile | 4 +- docs/tech-specs/graph-contexts.md | 107 +++- tests/contract/conftest.py | 42 +- tests/contract/test_message_contracts.py | 103 ++-- .../test_structured_data_contracts.py | 2 +- .../test_agent_kg_extraction_integration.py | 22 +- .../integration/test_cassandra_integration.py | 108 ++-- .../test_import_export_graceful_shutdown.py | 14 +- .../test_kg_extract_store_integration.py | 98 ++-- .../unit/test_cores/test_knowledge_manager.py | 12 +- .../test_ontology/test_entity_contexts.py | 172 +++--- .../test_ontology/test_ontology_triples.py | 68 +-- .../test_prompt_and_extraction.py | 36 +- .../test_gateway/test_dispatch_serialize.py | 76 +-- tests/unit/test_knowledge_graph/conftest.py | 48 +- .../test_agent_extraction.py | 88 +-- .../test_agent_extraction_edge_cases.py | 14 +- .../test_graph_validation.py | 2 +- .../test_triple_construction.py | 274 ++++----- .../test_graph_embeddings_milvus_query.py | 64 +-- .../test_graph_embeddings_pinecone_query.py | 20 +- .../test_graph_embeddings_qdrant_query.py | 29 +- .../test_memgraph_user_collection_query.py | 54 +- .../test_neo4j_user_collection_query.py | 54 +- .../test_triples_cassandra_query.py | 220 ++++---- .../test_query/test_triples_falkordb_query.py | 136 ++--- .../test_query/test_triples_memgraph_query.py | 136 ++--- .../test_query/test_triples_neo4j_query.py | 84 +-- .../test_graph_embeddings_milvus_storage.py | 26 +- .../test_graph_embeddings_qdrant_storage.py | 20 +- .../test_neo4j_user_collection_isolation.py | 42 +- .../test_triples_cassandra_storage.py | 51 +- .../test_triples_falkordb_storage.py | 38 +- .../test_triples_memgraph_storage.py | 32 +- .../test_triples_neo4j_storage.py | 47 +- trustgraph-base/trustgraph/api/flow.py | 39 +- trustgraph-base/trustgraph/api/knowledge.py | 11 +- trustgraph-base/trustgraph/api/library.py | 37 +- .../base/document_embeddings_query_service.py | 2 +- .../base/graph_embeddings_client.py | 12 +- .../base/graph_embeddings_query_service.py | 2 +- .../trustgraph/base/triples_client.py | 22 +- .../trustgraph/base/triples_query_service.py | 2 +- .../clients/triples_query_client.py | 6 +- .../messaging/translators/__init__.py | 2 +- .../messaging/translators/primitives.py | 140 ++++- .../messaging/translators/triples.py | 10 +- .../trustgraph/schema/core/primitives.py | 49 +- .../trustgraph/schema/knowledge/embeddings.py | 4 +- .../trustgraph/schema/knowledge/graph.py | 4 +- .../trustgraph/schema/services/lookup.py | 2 +- .../trustgraph/schema/services/query.py | 11 +- .../trustgraph/schema/services/retrieval.py | 2 +- trustgraph-bedrock/pyproject.toml | 2 +- trustgraph-cli/pyproject.toml | 2 +- trustgraph-embeddings-hf/pyproject.toml | 4 +- trustgraph-flow/pyproject.toml | 2 +- .../trustgraph/direct/cassandra_kg.py | 525 ++++++++++++------ .../trustgraph/extract/kg/agent/extract.py | 76 +-- .../extract/kg/definitions/extract.py | 16 +- .../trustgraph/extract/kg/ontology/extract.py | 112 ++-- .../extract/kg/ontology/triple_converter.py | 26 +- .../extract/kg/relationships/extract.py | 26 +- .../trustgraph/extract/kg/topics/extract.py | 8 +- .../trustgraph/gateway/dispatch/serialize.py | 49 +- .../query/doc_embeddings/milvus/service.py | 2 +- .../query/doc_embeddings/qdrant/service.py | 2 +- .../query/graph_embeddings/milvus/service.py | 6 +- .../graph_embeddings/pinecone/service.py | 6 +- .../query/graph_embeddings/qdrant/service.py | 6 +- .../query/triples/cassandra/service.py | 116 ++-- .../query/triples/falkordb/service.py | 94 ++-- .../query/triples/memgraph/service.py | 74 +-- .../trustgraph/query/triples/neo4j/service.py | 74 +-- .../storage/graph_embeddings/milvus/write.py | 19 +- .../graph_embeddings/pinecone/write.py | 19 +- .../storage/graph_embeddings/qdrant/write.py | 21 +- .../storage/triples/cassandra/write.py | 30 +- .../storage/triples/falkordb/write.py | 31 +- .../storage/triples/memgraph/write.py | 49 +- .../trustgraph/storage/triples/neo4j/write.py | 31 +- trustgraph-flow/trustgraph/tables/config.py | 2 +- .../trustgraph/tables/knowledge.py | 52 +- trustgraph-flow/trustgraph/tables/library.py | 36 +- trustgraph-ocr/pyproject.toml | 2 +- trustgraph-vertexai/pyproject.toml | 2 +- 86 files changed, 2458 insertions(+), 1764 deletions(-) diff --git a/Makefile b/Makefile index c7b4797f..8eb2d324 100644 --- a/Makefile +++ b/Makefile @@ -70,8 +70,8 @@ some-containers: -t ${CONTAINER_BASE}/trustgraph-base:${VERSION} . ${DOCKER} build -f containers/Containerfile.flow \ -t ${CONTAINER_BASE}/trustgraph-flow:${VERSION} . - ${DOCKER} build -f containers/Containerfile.vertexai \ - -t ${CONTAINER_BASE}/trustgraph-vertexai:${VERSION} . +# ${DOCKER} build -f containers/Containerfile.vertexai \ +# -t ${CONTAINER_BASE}/trustgraph-vertexai:${VERSION} . # ${DOCKER} build -f containers/Containerfile.mcp \ # -t ${CONTAINER_BASE}/trustgraph-mcp:${VERSION} . # ${DOCKER} build -f containers/Containerfile.vertexai \ diff --git a/docs/tech-specs/graph-contexts.md b/docs/tech-specs/graph-contexts.md index 8ddb3952..54737012 100644 --- a/docs/tech-specs/graph-contexts.md +++ b/docs/tech-specs/graph-contexts.md @@ -228,10 +228,15 @@ Following SPARQL conventions for backward compatibility: - **`g` omitted / None**: Query the default graph only - **`g` = specific IRI**: Query that named graph only -- **`g` = wildcard / `*`**: Query across all graphs +- **`g` = wildcard / `*`**: Query across all graphs (equivalent to SPARQL + `GRAPH ?g { ... }`) This keeps simple queries simple and makes named graph queries opt-in. +Cross-graph queries (g=wildcard) are fully supported. The Cassandra schema +includes dedicated tables (SPOG, POSG, OSPG) where g is a clustering column +rather than a partition key, enabling efficient queries across all graphs. + #### Temporal Queries **Find all facts discovered after a given date:** @@ -388,12 +393,78 @@ will proceed in phases: Cassandra requires multiple tables to support different query access patterns (each table efficiently queries by its partition key + clustering columns). -**Challenge: Quads** +##### Query Patterns -For triples, typical indexes are SPO, POS, OSP (partition by first, cluster by -rest). For quads, the graph dimension adds: SPOG, POSG, OSPG, GSPO, etc. +With quads (g, s, p, o), each position can be specified or wildcard, giving +16 possible query patterns: -**Challenge: Quoted Triples** +| # | g | s | p | o | Description | +|---|---|---|---|---|-------------| +| 1 | ? | ? | ? | ? | All quads | +| 2 | ? | ? | ? | o | By object | +| 3 | ? | ? | p | ? | By predicate | +| 4 | ? | ? | p | o | By predicate + object | +| 5 | ? | s | ? | ? | By subject | +| 6 | ? | s | ? | o | By subject + object | +| 7 | ? | s | p | ? | By subject + predicate | +| 8 | ? | s | p | o | Full triple (which graphs?) | +| 9 | g | ? | ? | ? | By graph | +| 10 | g | ? | ? | o | By graph + object | +| 11 | g | ? | p | ? | By graph + predicate | +| 12 | g | ? | p | o | By graph + predicate + object | +| 13 | g | s | ? | ? | By graph + subject | +| 14 | g | s | ? | o | By graph + subject + object | +| 15 | g | s | p | ? | By graph + subject + predicate | +| 16 | g | s | p | o | Exact quad | + +##### Table Design + +Cassandra constraint: You can only efficiently query by partition key, then +filter on clustering columns left-to-right. For g-wildcard queries, g must be +a clustering column. For g-specified queries, g in the partition key is more +efficient. + +**Two table families needed:** + +**Family A: g-wildcard queries** (g in clustering columns) + +| Table | Partition | Clustering | Supports patterns | +|-------|-----------|------------|-------------------| +| SPOG | (user, collection, s) | p, o, g | 5, 7, 8 | +| POSG | (user, collection, p) | o, s, g | 3, 4 | +| OSPG | (user, collection, o) | s, p, g | 2, 6 | + +**Family B: g-specified queries** (g in partition key) + +| Table | Partition | Clustering | Supports patterns | +|-------|-----------|------------|-------------------| +| GSPO | (user, collection, g, s) | p, o | 9, 13, 15, 16 | +| GPOS | (user, collection, g, p) | o, s | 11, 12 | +| GOSP | (user, collection, g, o) | s, p | 10, 14 | + +**Collection table** (for iteration and bulk deletion) + +| Table | Partition | Clustering | Purpose | +|-------|-----------|------------|---------| +| COLL | (user, collection) | g, s, p, o | Enumerate all quads in collection | + +##### Write and Delete Paths + +**Write path**: Insert into all 7 tables. + +**Delete collection path**: +1. Iterate COLL table for `(user, collection)` +2. For each quad, delete from all 6 query tables +3. Delete from COLL table (or range delete) + +**Delete single quad path**: Delete from all 7 tables directly. + +##### Storage Cost + +Each quad is stored 7 times. This is the cost of flexible querying combined +with efficient collection deletion. + +##### Quoted Triples in Storage Subject or object can be a triple itself. Options: @@ -425,29 +496,9 @@ Metadata table: - Pro: Clean separation, can index triple IDs - Con: Requires computing/managing triple identity, two-phase lookups -**Option C: Hybrid** -- Store quads normally with serialized quoted triple strings for simple cases -- Maintain a separate triple ID lookup for advanced queries -- Pro: Flexibility -- Con: Complexity - -**Recommendation**: TBD after prototyping. Option A is simplest for initial -implementation; Option B may be needed for advanced query patterns. - -#### Indexing Strategy - -Indexes must support the defined query patterns: - -| Query Type | Access Pattern | Index Needed | -|------------|----------------|--------------| -| Facts by date | P=discoveredOn, O>date | POG (predicate, object, graph) | -| Facts by source | P=supportedBy, O=source | POG | -| Facts by asserter | P=assertedBy, O=person | POG | -| Metadata for a fact | S=quotedTriple | SPO/SPOG | -| All facts in graph | G=graphIRI | GSPO | - -For temporal range queries (dates), Cassandra clustering column ordering -enables efficient scans when date is a clustering column. +**Recommendation**: Start with Option A (serialized strings) for simplicity. +Option B may be needed if advanced query patterns over quoted triple +components are required. 2. **Phase 2+: Other Backends** - Neo4j and other stores implemented in subsequent stages diff --git a/tests/contract/conftest.py b/tests/contract/conftest.py index 3d184d3d..e82ccd98 100644 --- a/tests/contract/conftest.py +++ b/tests/contract/conftest.py @@ -15,10 +15,10 @@ from trustgraph.schema import ( TextCompletionRequest, TextCompletionResponse, DocumentRagQuery, DocumentRagResponse, AgentRequest, AgentResponse, AgentStep, - Chunk, Triple, Triples, Value, Error, + Chunk, Triple, Triples, Term, Error, EntityContext, EntityContexts, GraphEmbeddings, EntityEmbeddings, - Metadata + Metadata, IRI, LITERAL ) @@ -43,7 +43,7 @@ def schema_registry(): "Chunk": Chunk, "Triple": Triple, "Triples": Triples, - "Value": Value, + "Term": Term, "Error": Error, "EntityContext": EntityContext, "EntityContexts": EntityContexts, @@ -98,26 +98,22 @@ def sample_message_data(): "collection": "test_collection", "metadata": [] }, - "Value": { - "value": "http://example.com/entity", - "is_uri": True, - "type": "" + "Term": { + "type": IRI, + "iri": "http://example.com/entity" }, "Triple": { - "s": Value( - value="http://example.com/subject", - is_uri=True, - type="" + "s": Term( + type=IRI, + iri="http://example.com/subject" ), - "p": Value( - value="http://example.com/predicate", - is_uri=True, - type="" + "p": Term( + type=IRI, + iri="http://example.com/predicate" ), - "o": Value( - value="Object value", - is_uri=False, - type="" + "o": Term( + type=LITERAL, + value="Object value" ) } } @@ -139,10 +135,10 @@ def invalid_message_data(): {"query": "test", "user": "test", "collection": "test", "doc_limit": -1}, # Invalid doc_limit {"query": "test"}, # Missing required fields ], - "Value": [ - {"value": None, "is_uri": True, "type": ""}, # Invalid value (None) - {"value": "test", "is_uri": "not_boolean", "type": ""}, # Invalid is_uri - {"value": 123, "is_uri": True, "type": ""}, # Invalid value (not string) + "Term": [ + {"type": IRI, "iri": None}, # Invalid iri (None) + {"type": "invalid_type", "value": "test"}, # Invalid type + {"type": LITERAL, "value": 123}, # Invalid value (not string) ] } diff --git a/tests/contract/test_message_contracts.py b/tests/contract/test_message_contracts.py index 6b10bd2f..746ebaed 100644 --- a/tests/contract/test_message_contracts.py +++ b/tests/contract/test_message_contracts.py @@ -15,14 +15,14 @@ from trustgraph.schema import ( TextCompletionRequest, TextCompletionResponse, DocumentRagQuery, DocumentRagResponse, AgentRequest, AgentResponse, AgentStep, - Chunk, Triple, Triples, Value, Error, + Chunk, Triple, Triples, Term, Error, EntityContext, EntityContexts, GraphEmbeddings, EntityEmbeddings, Metadata, Field, RowSchema, StructuredDataSubmission, ExtractedObject, QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse, StructuredQueryRequest, StructuredQueryResponse, - StructuredObjectEmbedding + StructuredObjectEmbedding, IRI, LITERAL ) from .conftest import validate_schema_contract, serialize_deserialize_test @@ -271,52 +271,51 @@ class TestAgentMessageContracts: class TestGraphMessageContracts: """Contract tests for Graph/Knowledge message schemas""" - def test_value_schema_contract(self, sample_message_data): - """Test Value schema contract""" + def test_term_schema_contract(self, sample_message_data): + """Test Term schema contract""" # Arrange - value_data = sample_message_data["Value"] + term_data = sample_message_data["Term"] # Act & Assert - assert validate_schema_contract(Value, value_data) - - # Test URI value - uri_value = Value(**value_data) - assert uri_value.value == "http://example.com/entity" - assert uri_value.is_uri is True + assert validate_schema_contract(Term, term_data) - # Test literal value - literal_value = Value( - value="Literal text value", - is_uri=False, - type="" + # Test URI term + uri_term = Term(**term_data) + assert uri_term.iri == "http://example.com/entity" + assert uri_term.type == IRI + + # Test literal term + literal_term = Term( + type=LITERAL, + value="Literal text value" ) - assert literal_value.value == "Literal text value" - assert literal_value.is_uri is False + assert literal_term.value == "Literal text value" + assert literal_term.type == LITERAL def test_triple_schema_contract(self, sample_message_data): """Test Triple schema contract""" # Arrange triple_data = sample_message_data["Triple"] - # Act & Assert - Triple uses Value objects, not dict validation + # Act & Assert - Triple uses Term objects, not dict validation triple = Triple( s=triple_data["s"], - p=triple_data["p"], + p=triple_data["p"], o=triple_data["o"] ) - assert triple.s.value == "http://example.com/subject" - assert triple.p.value == "http://example.com/predicate" + assert triple.s.iri == "http://example.com/subject" + assert triple.p.iri == "http://example.com/predicate" assert triple.o.value == "Object value" - assert triple.s.is_uri is True - assert triple.p.is_uri is True - assert triple.o.is_uri is False + assert triple.s.type == IRI + assert triple.p.type == IRI + assert triple.o.type == LITERAL def test_triples_schema_contract(self, sample_message_data): """Test Triples (batch) schema contract""" # Arrange metadata = Metadata(**sample_message_data["Metadata"]) triple = Triple(**sample_message_data["Triple"]) - + triples_data = { "metadata": metadata, "triples": [triple] @@ -324,11 +323,11 @@ class TestGraphMessageContracts: # Act & Assert assert validate_schema_contract(Triples, triples_data) - + triples = Triples(**triples_data) assert triples.metadata.id == "test-doc-123" assert len(triples.triples) == 1 - assert triples.triples[0].s.value == "http://example.com/subject" + assert triples.triples[0].s.iri == "http://example.com/subject" def test_chunk_schema_contract(self, sample_message_data): """Test Chunk schema contract""" @@ -349,29 +348,29 @@ class TestGraphMessageContracts: def test_entity_context_schema_contract(self): """Test EntityContext schema contract""" # Arrange - entity_value = Value(value="http://example.com/entity", is_uri=True, type="") + entity_term = Term(type=IRI, iri="http://example.com/entity") entity_context_data = { - "entity": entity_value, + "entity": entity_term, "context": "Context information about the entity" } # Act & Assert assert validate_schema_contract(EntityContext, entity_context_data) - + entity_context = EntityContext(**entity_context_data) - assert entity_context.entity.value == "http://example.com/entity" + assert entity_context.entity.iri == "http://example.com/entity" assert entity_context.context == "Context information about the entity" def test_entity_contexts_batch_schema_contract(self, sample_message_data): """Test EntityContexts (batch) schema contract""" # Arrange metadata = Metadata(**sample_message_data["Metadata"]) - entity_value = Value(value="http://example.com/entity", is_uri=True, type="") + entity_term = Term(type=IRI, iri="http://example.com/entity") entity_context = EntityContext( - entity=entity_value, + entity=entity_term, context="Entity context" ) - + entity_contexts_data = { "metadata": metadata, "entities": [entity_context] @@ -379,7 +378,7 @@ class TestGraphMessageContracts: # Act & Assert assert validate_schema_contract(EntityContexts, entity_contexts_data) - + entity_contexts = EntityContexts(**entity_contexts_data) assert entity_contexts.metadata.id == "test-doc-123" assert len(entity_contexts.entities) == 1 @@ -417,10 +416,10 @@ class TestMetadataMessageContracts: # Act & Assert assert validate_schema_contract(Metadata, metadata_data) - + metadata = Metadata(**metadata_data) assert len(metadata.metadata) == 1 - assert metadata.metadata[0].s.value == "http://example.com/subject" + assert metadata.metadata[0].s.iri == "http://example.com/subject" def test_error_schema_contract(self): """Test Error schema contract""" @@ -532,7 +531,7 @@ class TestSerializationContracts: # Test each schema in the registry for schema_name, schema_class in schema_registry.items(): if schema_name in sample_message_data: - # Skip Triple schema as it requires special handling with Value objects + # Skip Triple schema as it requires special handling with Term objects if schema_name == "Triple": continue @@ -541,36 +540,36 @@ class TestSerializationContracts: assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}" def test_triple_serialization_contract(self, sample_message_data): - """Test Triple schema serialization contract with Value objects""" + """Test Triple schema serialization contract with Term objects""" # Arrange triple_data = sample_message_data["Triple"] - + # Act triple = Triple( s=triple_data["s"], - p=triple_data["p"], + p=triple_data["p"], o=triple_data["o"] ) - - # Assert - Test that Value objects are properly constructed and accessible - assert triple.s.value == "http://example.com/subject" - assert triple.p.value == "http://example.com/predicate" + + # Assert - Test that Term objects are properly constructed and accessible + assert triple.s.iri == "http://example.com/subject" + assert triple.p.iri == "http://example.com/predicate" assert triple.o.value == "Object value" - assert isinstance(triple.s, Value) - assert isinstance(triple.p, Value) - assert isinstance(triple.o, Value) + assert isinstance(triple.s, Term) + assert isinstance(triple.p, Term) + assert isinstance(triple.o, Term) def test_nested_schema_serialization_contract(self, sample_message_data): """Test serialization of nested schemas""" # Test Triples (contains Metadata and Triple objects) metadata = Metadata(**sample_message_data["Metadata"]) triple = Triple(**sample_message_data["Triple"]) - + triples = Triples(metadata=metadata, triples=[triple]) - + # Verify nested objects maintain their contracts assert triples.metadata.id == "test-doc-123" - assert triples.triples[0].s.value == "http://example.com/subject" + assert triples.triples[0].s.iri == "http://example.com/subject" def test_array_field_serialization_contract(self): """Test serialization of array fields""" diff --git a/tests/contract/test_structured_data_contracts.py b/tests/contract/test_structured_data_contracts.py index 91707d4d..71ccd787 100644 --- a/tests/contract/test_structured_data_contracts.py +++ b/tests/contract/test_structured_data_contracts.py @@ -15,7 +15,7 @@ from trustgraph.schema import ( QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse, StructuredQueryRequest, StructuredQueryResponse, StructuredObjectEmbedding, Field, RowSchema, - Metadata, Error, Value + Metadata, Error ) from .conftest import serialize_deserialize_test diff --git a/tests/integration/test_agent_kg_extraction_integration.py b/tests/integration/test_agent_kg_extraction_integration.py index 01516d8b..849547c8 100644 --- a/tests/integration/test_agent_kg_extraction_integration.py +++ b/tests/integration/test_agent_kg_extraction_integration.py @@ -12,7 +12,7 @@ import json from unittest.mock import AsyncMock, MagicMock, patch from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor -from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error +from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL from trustgraph.schema import EntityContext, EntityContexts, AgentRequest, AgentResponse from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF from trustgraph.template.prompt_manager import PromptManager @@ -78,9 +78,9 @@ class TestAgentKgExtractionIntegration: id="doc123", metadata=[ Triple( - s=Value(value="doc123", is_uri=True), - p=Value(value="http://example.org/type", is_uri=True), - o=Value(value="document", is_uri=False) + s=Term(type=IRI, iri="doc123"), + p=Term(type=IRI, iri="http://example.org/type"), + o=Term(type=LITERAL, value="document") ) ] ) @@ -178,15 +178,15 @@ class TestAgentKgExtractionIntegration: assert len(sent_triples.triples) > 0 # Check that we have definition triples - definition_triples = [t for t in sent_triples.triples if t.p.value == DEFINITION] + definition_triples = [t for t in sent_triples.triples if t.p.iri == DEFINITION] assert len(definition_triples) >= 2 # Should have definitions for ML and Neural Networks - + # Check that we have label triples - label_triples = [t for t in sent_triples.triples if t.p.value == RDF_LABEL] + label_triples = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL] assert len(label_triples) >= 2 # Should have labels for entities - + # Check subject-of relationships - subject_of_triples = [t for t in sent_triples.triples if t.p.value == SUBJECT_OF] + subject_of_triples = [t for t in sent_triples.triples if t.p.iri == SUBJECT_OF] assert len(subject_of_triples) >= 2 # Entities should be linked to document # Verify entity contexts were emitted @@ -198,7 +198,7 @@ class TestAgentKgExtractionIntegration: assert len(sent_contexts.entities) >= 2 # Should have contexts for both entities # Verify entity URIs are properly formed - entity_uris = [ec.entity.value for ec in sent_contexts.entities] + entity_uris = [ec.entity.iri for ec in sent_contexts.entities] assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris @@ -401,7 +401,7 @@ class TestAgentKgExtractionIntegration: sent_triples = triples_publisher.send.call_args[0][0] # Check that unicode entity was properly processed - entity_labels = [t for t in sent_triples.triples if t.p.value == RDF_LABEL and t.o.value == "機械学習"] + entity_labels = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL and t.o.value == "機械学習"] assert len(entity_labels) > 0 @pytest.mark.asyncio diff --git a/tests/integration/test_cassandra_integration.py b/tests/integration/test_cassandra_integration.py index 560f3132..2f5a4195 100644 --- a/tests/integration/test_cassandra_integration.py +++ b/tests/integration/test_cassandra_integration.py @@ -16,7 +16,7 @@ from .cassandra_test_helper import cassandra_container from trustgraph.direct.cassandra_kg import KnowledgeGraph from trustgraph.storage.triples.cassandra.write import Processor as StorageProcessor from trustgraph.query.triples.cassandra.service import Processor as QueryProcessor -from trustgraph.schema import Triple, Value, Metadata, Triples, TriplesQueryRequest +from trustgraph.schema import Triple, Term, Metadata, Triples, TriplesQueryRequest, IRI, LITERAL @pytest.mark.integration @@ -118,19 +118,19 @@ class TestCassandraIntegration: metadata=Metadata(user="testuser", collection="testcol"), triples=[ Triple( - s=Value(value="http://example.org/person1", is_uri=True), - p=Value(value="http://example.org/name", is_uri=True), - o=Value(value="Alice Smith", is_uri=False) + s=Term(type=IRI, iri="http://example.org/person1"), + p=Term(type=IRI, iri="http://example.org/name"), + o=Term(type=LITERAL, value="Alice Smith") ), Triple( - s=Value(value="http://example.org/person1", is_uri=True), - p=Value(value="http://example.org/age", is_uri=True), - o=Value(value="25", is_uri=False) + s=Term(type=IRI, iri="http://example.org/person1"), + p=Term(type=IRI, iri="http://example.org/age"), + o=Term(type=LITERAL, value="25") ), Triple( - s=Value(value="http://example.org/person1", is_uri=True), - p=Value(value="http://example.org/department", is_uri=True), - o=Value(value="Engineering", is_uri=False) + s=Term(type=IRI, iri="http://example.org/person1"), + p=Term(type=IRI, iri="http://example.org/department"), + o=Term(type=LITERAL, value="Engineering") ) ] ) @@ -181,19 +181,19 @@ class TestCassandraIntegration: metadata=Metadata(user="testuser", collection="testcol"), triples=[ Triple( - s=Value(value="http://example.org/alice", is_uri=True), - p=Value(value="http://example.org/knows", is_uri=True), - o=Value(value="http://example.org/bob", is_uri=True) + s=Term(type=IRI, iri="http://example.org/alice"), + p=Term(type=IRI, iri="http://example.org/knows"), + o=Term(type=IRI, iri="http://example.org/bob") ), Triple( - s=Value(value="http://example.org/alice", is_uri=True), - p=Value(value="http://example.org/age", is_uri=True), - o=Value(value="30", is_uri=False) + s=Term(type=IRI, iri="http://example.org/alice"), + p=Term(type=IRI, iri="http://example.org/age"), + o=Term(type=LITERAL, value="30") ), Triple( - s=Value(value="http://example.org/bob", is_uri=True), - p=Value(value="http://example.org/knows", is_uri=True), - o=Value(value="http://example.org/charlie", is_uri=True) + s=Term(type=IRI, iri="http://example.org/bob"), + p=Term(type=IRI, iri="http://example.org/knows"), + o=Term(type=IRI, iri="http://example.org/charlie") ) ] ) @@ -208,7 +208,7 @@ class TestCassandraIntegration: # Test S query (find all relationships for Alice) s_query = TriplesQueryRequest( - s=Value(value="http://example.org/alice", is_uri=True), + s=Term(type=IRI, iri="http://example.org/alice"), p=None, # None for wildcard o=None, # None for wildcard limit=10, @@ -218,18 +218,18 @@ class TestCassandraIntegration: s_results = await query_processor.query_triples(s_query) print(f"Query processor results: {len(s_results)}") for result in s_results: - print(f" S={result.s.value}, P={result.p.value}, O={result.o.value}") + print(f" S={result.s.iri}, P={result.p.iri}, O={result.o.iri if result.o.type == IRI else result.o.value}") assert len(s_results) == 2 - - s_predicates = [t.p.value for t in s_results] + + s_predicates = [t.p.iri for t in s_results] assert "http://example.org/knows" in s_predicates assert "http://example.org/age" in s_predicates print("✓ Subject queries via processor working") - + # Test P query (find all "knows" relationships) p_query = TriplesQueryRequest( s=None, # None for wildcard - p=Value(value="http://example.org/knows", is_uri=True), + p=Term(type=IRI, iri="http://example.org/knows"), o=None, # None for wildcard limit=10, user="testuser", @@ -238,8 +238,8 @@ class TestCassandraIntegration: p_results = await query_processor.query_triples(p_query) print(p_results) assert len(p_results) == 2 # Alice knows Bob, Bob knows Charlie - - p_subjects = [t.s.value for t in p_results] + + p_subjects = [t.s.iri for t in p_results] assert "http://example.org/alice" in p_subjects assert "http://example.org/bob" in p_subjects print("✓ Predicate queries via processor working") @@ -262,19 +262,19 @@ class TestCassandraIntegration: metadata=Metadata(user="concurrent_test", collection="people"), triples=[ Triple( - s=Value(value=f"http://example.org/{person_id}", is_uri=True), - p=Value(value="http://example.org/name", is_uri=True), - o=Value(value=name, is_uri=False) + s=Term(type=IRI, iri=f"http://example.org/{person_id}"), + p=Term(type=IRI, iri="http://example.org/name"), + o=Term(type=LITERAL, value=name) ), Triple( - s=Value(value=f"http://example.org/{person_id}", is_uri=True), - p=Value(value="http://example.org/age", is_uri=True), - o=Value(value=str(age), is_uri=False) + s=Term(type=IRI, iri=f"http://example.org/{person_id}"), + p=Term(type=IRI, iri="http://example.org/age"), + o=Term(type=LITERAL, value=str(age)) ), Triple( - s=Value(value=f"http://example.org/{person_id}", is_uri=True), - p=Value(value="http://example.org/department", is_uri=True), - o=Value(value=department, is_uri=False) + s=Term(type=IRI, iri=f"http://example.org/{person_id}"), + p=Term(type=IRI, iri="http://example.org/department"), + o=Term(type=LITERAL, value=department) ) ] ) @@ -333,36 +333,36 @@ class TestCassandraIntegration: triples=[ # People and their types Triple( - s=Value(value="http://company.org/alice", is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://company.org/Employee", is_uri=True) + s=Term(type=IRI, iri="http://company.org/alice"), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri="http://company.org/Employee") ), Triple( - s=Value(value="http://company.org/bob", is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://company.org/Employee", is_uri=True) + s=Term(type=IRI, iri="http://company.org/bob"), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri="http://company.org/Employee") ), # Relationships Triple( - s=Value(value="http://company.org/alice", is_uri=True), - p=Value(value="http://company.org/reportsTo", is_uri=True), - o=Value(value="http://company.org/bob", is_uri=True) + s=Term(type=IRI, iri="http://company.org/alice"), + p=Term(type=IRI, iri="http://company.org/reportsTo"), + o=Term(type=IRI, iri="http://company.org/bob") ), Triple( - s=Value(value="http://company.org/alice", is_uri=True), - p=Value(value="http://company.org/worksIn", is_uri=True), - o=Value(value="http://company.org/engineering", is_uri=True) + s=Term(type=IRI, iri="http://company.org/alice"), + p=Term(type=IRI, iri="http://company.org/worksIn"), + o=Term(type=IRI, iri="http://company.org/engineering") ), # Personal info Triple( - s=Value(value="http://company.org/alice", is_uri=True), - p=Value(value="http://company.org/fullName", is_uri=True), - o=Value(value="Alice Johnson", is_uri=False) + s=Term(type=IRI, iri="http://company.org/alice"), + p=Term(type=IRI, iri="http://company.org/fullName"), + o=Term(type=LITERAL, value="Alice Johnson") ), Triple( - s=Value(value="http://company.org/alice", is_uri=True), - p=Value(value="http://company.org/email", is_uri=True), - o=Value(value="alice@company.org", is_uri=False) + s=Term(type=IRI, iri="http://company.org/alice"), + p=Term(type=IRI, iri="http://company.org/email"), + o=Term(type=LITERAL, value="alice@company.org") ), ] ) diff --git a/tests/integration/test_import_export_graceful_shutdown.py b/tests/integration/test_import_export_graceful_shutdown.py index 30197731..13a851df 100644 --- a/tests/integration/test_import_export_graceful_shutdown.py +++ b/tests/integration/test_import_export_graceful_shutdown.py @@ -51,10 +51,10 @@ class MockWebSocket: "metadata": { "id": "test-id", "metadata": {}, - "user": "test-user", + "user": "test-user", "collection": "test-collection" }, - "triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}] + "triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}] } @@ -118,7 +118,7 @@ async def test_import_graceful_shutdown_integration(mock_backend): "user": "test-user", "collection": "test-collection" }, - "triples": [{"s": {"v": f"subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"object-{i}", "e": False}}] + "triples": [{"s": {"t": "l", "v": f"subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"object-{i}"}}] } messages.append(msg_data) @@ -163,7 +163,7 @@ async def test_export_no_message_loss_integration(mock_backend): "user": "test-user", "collection": "test-collection" }, - "triples": [{"s": {"v": f"export-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"export-object-{i}", "e": False}}] + "triples": [{"s": {"t": "l", "v": f"export-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"export-object-{i}"}}] } # Create Triples object instead of raw dict from trustgraph.schema import Triples, Metadata @@ -302,7 +302,7 @@ async def test_concurrent_import_export_shutdown(): "user": "test-user", "collection": "test-collection" }, - "triples": [{"s": {"v": f"concurrent-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}] + "triples": [{"s": {"t": "l", "v": f"concurrent-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}] } await import_handler.receive(msg) @@ -359,7 +359,7 @@ async def test_websocket_close_during_message_processing(): "user": "test-user", "collection": "test-collection" }, - "triples": [{"s": {"v": f"slow-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}] + "triples": [{"s": {"t": "l", "v": f"slow-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}] } task = asyncio.create_task(import_handler.receive(msg)) message_tasks.append(task) @@ -423,7 +423,7 @@ async def test_backpressure_during_shutdown(): # Simulate receiving and processing a message msg_data = { "metadata": {"id": f"msg-{i}"}, - "triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}] + "triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}] } await ws.send_json(msg_data) # Check if we should stop diff --git a/tests/integration/test_kg_extract_store_integration.py b/tests/integration/test_kg_extract_store_integration.py index dd13789f..3982cc13 100644 --- a/tests/integration/test_kg_extract_store_integration.py +++ b/tests/integration/test_kg_extract_store_integration.py @@ -15,7 +15,7 @@ from unittest.mock import AsyncMock, MagicMock, patch from trustgraph.extract.kg.definitions.extract import Processor as DefinitionsProcessor from trustgraph.extract.kg.relationships.extract import Processor as RelationshipsProcessor from trustgraph.storage.knowledge.store import Processor as KnowledgeStoreProcessor -from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error +from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL from trustgraph.schema import EntityContext, EntityContexts, GraphEmbeddings from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF @@ -253,24 +253,24 @@ class TestKnowledgeGraphPipelineIntegration: if s and o: s_uri = definitions_processor.to_uri(s) - s_value = Value(value=str(s_uri), is_uri=True) - o_value = Value(value=str(o), is_uri=False) - + s_term = Term(type=IRI, iri=str(s_uri)) + o_term = Term(type=LITERAL, value=str(o)) + # Generate triples as the processor would triples.append(Triple( - s=s_value, - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=s, is_uri=False) + s=s_term, + p=Term(type=IRI, iri=RDF_LABEL), + o=Term(type=LITERAL, value=s) )) - + triples.append(Triple( - s=s_value, - p=Value(value=DEFINITION, is_uri=True), - o=o_value + s=s_term, + p=Term(type=IRI, iri=DEFINITION), + o=o_term )) - + entities.append(EntityContext( - entity=s_value, + entity=s_term, context=defn["definition"] )) @@ -279,16 +279,16 @@ class TestKnowledgeGraphPipelineIntegration: assert len(entities) == 3 # 1 entity context per entity # Verify triple structure - label_triples = [t for t in triples if t.p.value == RDF_LABEL] - definition_triples = [t for t in triples if t.p.value == DEFINITION] - + label_triples = [t for t in triples if t.p.iri == RDF_LABEL] + definition_triples = [t for t in triples if t.p.iri == DEFINITION] + assert len(label_triples) == 3 assert len(definition_triples) == 3 - + # Verify entity contexts for entity in entities: - assert entity.entity.is_uri is True - assert entity.entity.value.startswith(TRUSTGRAPH_ENTITIES) + assert entity.entity.type == IRI + assert entity.entity.iri.startswith(TRUSTGRAPH_ENTITIES) assert len(entity.context) > 0 @pytest.mark.asyncio @@ -309,52 +309,52 @@ class TestKnowledgeGraphPipelineIntegration: s = rel["subject"] p = rel["predicate"] o = rel["object"] - + if s and p and o: s_uri = relationships_processor.to_uri(s) - s_value = Value(value=str(s_uri), is_uri=True) - + s_term = Term(type=IRI, iri=str(s_uri)) + p_uri = relationships_processor.to_uri(p) - p_value = Value(value=str(p_uri), is_uri=True) - + p_term = Term(type=IRI, iri=str(p_uri)) + if rel["object-entity"]: o_uri = relationships_processor.to_uri(o) - o_value = Value(value=str(o_uri), is_uri=True) + o_term = Term(type=IRI, iri=str(o_uri)) else: - o_value = Value(value=str(o), is_uri=False) - + o_term = Term(type=LITERAL, value=str(o)) + # Main relationship triple - triples.append(Triple(s=s_value, p=p_value, o=o_value)) - + triples.append(Triple(s=s_term, p=p_term, o=o_term)) + # Label triples triples.append(Triple( - s=s_value, - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=str(s), is_uri=False) + s=s_term, + p=Term(type=IRI, iri=RDF_LABEL), + o=Term(type=LITERAL, value=str(s)) )) - + triples.append(Triple( - s=p_value, - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=str(p), is_uri=False) + s=p_term, + p=Term(type=IRI, iri=RDF_LABEL), + o=Term(type=LITERAL, value=str(p)) )) - + if rel["object-entity"]: triples.append(Triple( - s=o_value, - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=str(o), is_uri=False) + s=o_term, + p=Term(type=IRI, iri=RDF_LABEL), + o=Term(type=LITERAL, value=str(o)) )) # Assert assert len(triples) > 0 # Verify relationship triples exist - relationship_triples = [t for t in triples if t.p.value.endswith("is_subset_of") or t.p.value.endswith("is_used_in")] + relationship_triples = [t for t in triples if t.p.iri.endswith("is_subset_of") or t.p.iri.endswith("is_used_in")] assert len(relationship_triples) >= 2 - + # Verify label triples - label_triples = [t for t in triples if t.p.value == RDF_LABEL] + label_triples = [t for t in triples if t.p.iri == RDF_LABEL] assert len(label_triples) > 0 @pytest.mark.asyncio @@ -374,9 +374,9 @@ class TestKnowledgeGraphPipelineIntegration: ), triples=[ Triple( - s=Value(value="http://trustgraph.ai/e/machine-learning", is_uri=True), - p=Value(value=DEFINITION, is_uri=True), - o=Value(value="A subset of AI", is_uri=False) + s=Term(type=IRI, iri="http://trustgraph.ai/e/machine-learning"), + p=Term(type=IRI, iri=DEFINITION), + o=Term(type=LITERAL, value="A subset of AI") ) ] ) @@ -602,9 +602,9 @@ class TestKnowledgeGraphPipelineIntegration: collection="test_collection", metadata=[ Triple( - s=Value(value="doc:test", is_uri=True), - p=Value(value="dc:title", is_uri=True), - o=Value(value="Test Document", is_uri=False) + s=Term(type=IRI, iri="doc:test"), + p=Term(type=IRI, iri="dc:title"), + o=Term(type=LITERAL, value="Test Document") ) ] ) diff --git a/tests/unit/test_cores/test_knowledge_manager.py b/tests/unit/test_cores/test_knowledge_manager.py index e0ad9339..96c9c427 100644 --- a/tests/unit/test_cores/test_knowledge_manager.py +++ b/tests/unit/test_cores/test_knowledge_manager.py @@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, Mock, patch, MagicMock from unittest.mock import call from trustgraph.cores.knowledge import KnowledgeManager -from trustgraph.schema import KnowledgeResponse, Triples, GraphEmbeddings, Metadata, Triple, Value, EntityEmbeddings +from trustgraph.schema import KnowledgeResponse, Triples, GraphEmbeddings, Metadata, Triple, Term, EntityEmbeddings, IRI, LITERAL @pytest.fixture @@ -71,15 +71,15 @@ def sample_triples(): return Triples( metadata=Metadata( id="test-doc-id", - user="test-user", + user="test-user", collection="default", # This should be overridden metadata=[] ), triples=[ Triple( - s=Value(value="http://example.org/john", is_uri=True), - p=Value(value="http://example.org/name", is_uri=True), - o=Value(value="John Smith", is_uri=False) + s=Term(type=IRI, iri="http://example.org/john"), + p=Term(type=IRI, iri="http://example.org/name"), + o=Term(type=LITERAL, value="John Smith") ) ] ) @@ -97,7 +97,7 @@ def sample_graph_embeddings(): ), entities=[ EntityEmbeddings( - entity=Value(value="http://example.org/john", is_uri=True), + entity=Term(type=IRI, iri="http://example.org/john"), vectors=[[0.1, 0.2, 0.3]] ) ] diff --git a/tests/unit/test_extract/test_ontology/test_entity_contexts.py b/tests/unit/test_extract/test_ontology/test_entity_contexts.py index c867b05a..fde24e58 100644 --- a/tests/unit/test_extract/test_ontology/test_entity_contexts.py +++ b/tests/unit/test_extract/test_ontology/test_entity_contexts.py @@ -7,7 +7,7 @@ collecting labels and definitions for entity embedding and retrieval. import pytest from trustgraph.extract.kg.ontology.extract import Processor -from trustgraph.schema.core.primitives import Triple, Value +from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL from trustgraph.schema.knowledge.graph import EntityContext @@ -25,9 +25,9 @@ class TestEntityContextBuilding: """Test that entity context is built from rdfs:label.""" triples = [ Triple( - s=Value(value="https://example.com/entity/cornish-pasty", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Cornish Pasty", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/cornish-pasty"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Cornish Pasty") ) ] @@ -35,16 +35,16 @@ class TestEntityContextBuilding: assert len(contexts) == 1, "Should create one entity context" assert isinstance(contexts[0], EntityContext) - assert contexts[0].entity.value == "https://example.com/entity/cornish-pasty" + assert contexts[0].entity.iri == "https://example.com/entity/cornish-pasty" assert "Label: Cornish Pasty" in contexts[0].context def test_builds_context_from_definition(self, processor): """Test that entity context includes definitions.""" triples = [ Triple( - s=Value(value="https://example.com/entity/pasty", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value="A baked pastry filled with savory ingredients", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/pasty"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value="A baked pastry filled with savory ingredients") ) ] @@ -57,14 +57,14 @@ class TestEntityContextBuilding: """Test that label and definition are combined in context.""" triples = [ Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Pasty Recipe", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Pasty Recipe") ), Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value="Traditional Cornish pastry recipe", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value="Traditional Cornish pastry recipe") ) ] @@ -80,14 +80,14 @@ class TestEntityContextBuilding: """Test that only the first label is used in context.""" triples = [ Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="First Label", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="First Label") ), Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Second Label", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Second Label") ) ] @@ -101,14 +101,14 @@ class TestEntityContextBuilding: """Test that all definitions are included in context.""" triples = [ Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value="First definition", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value="First definition") ), Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value="Second definition", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value="Second definition") ) ] @@ -123,9 +123,9 @@ class TestEntityContextBuilding: """Test that schema.org description is treated as definition.""" triples = [ Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="https://schema.org/description", is_uri=True), - o=Value(value="A delicious food item", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="https://schema.org/description"), + o=Term(type=LITERAL, value="A delicious food item") ) ] @@ -138,26 +138,26 @@ class TestEntityContextBuilding: """Test that contexts are created for multiple entities.""" triples = [ Triple( - s=Value(value="https://example.com/entity/entity1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Entity One", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/entity1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Entity One") ), Triple( - s=Value(value="https://example.com/entity/entity2", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Entity Two", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/entity2"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Entity Two") ), Triple( - s=Value(value="https://example.com/entity/entity3", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Entity Three", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/entity3"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Entity Three") ) ] contexts = processor.build_entity_contexts(triples) assert len(contexts) == 3, "Should create context for each entity" - entity_uris = [ctx.entity.value for ctx in contexts] + entity_uris = [ctx.entity.iri for ctx in contexts] assert "https://example.com/entity/entity1" in entity_uris assert "https://example.com/entity/entity2" in entity_uris assert "https://example.com/entity/entity3" in entity_uris @@ -166,9 +166,9 @@ class TestEntityContextBuilding: """Test that URI objects are ignored (only literal labels/definitions).""" triples = [ Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="https://example.com/some/uri", is_uri=True) # URI, not literal + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=IRI, iri="https://example.com/some/uri") # URI, not literal ) ] @@ -181,14 +181,14 @@ class TestEntityContextBuilding: """Test that other predicates are ignored.""" triples = [ Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://example.com/Food", is_uri=True) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri="http://example.com/Food") ), Triple( - s=Value(value="https://example.com/entity/food1", is_uri=True), - p=Value(value="http://example.com/produces", is_uri=True), - o=Value(value="https://example.com/entity/food2", is_uri=True) + s=Term(type=IRI, iri="https://example.com/entity/food1"), + p=Term(type=IRI, iri="http://example.com/produces"), + o=Term(type=IRI, iri="https://example.com/entity/food2") ) ] @@ -205,29 +205,29 @@ class TestEntityContextBuilding: assert len(contexts) == 0, "Empty triple list should return empty contexts" - def test_entity_context_has_value_object(self, processor): - """Test that EntityContext.entity is a Value object.""" + def test_entity_context_has_term_object(self, processor): + """Test that EntityContext.entity is a Term object.""" triples = [ Triple( - s=Value(value="https://example.com/entity/test", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Test Entity", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/test"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Test Entity") ) ] contexts = processor.build_entity_contexts(triples) assert len(contexts) == 1 - assert isinstance(contexts[0].entity, Value), "Entity should be Value object" - assert contexts[0].entity.is_uri, "Entity should be marked as URI" + assert isinstance(contexts[0].entity, Term), "Entity should be Term object" + assert contexts[0].entity.type == IRI, "Entity should be IRI type" def test_entity_context_text_is_string(self, processor): """Test that EntityContext.context is a string.""" triples = [ Triple( - s=Value(value="https://example.com/entity/test", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Test Entity", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/test"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Test Entity") ) ] @@ -241,22 +241,22 @@ class TestEntityContextBuilding: triples = [ # Entity with label - should create context Triple( - s=Value(value="https://example.com/entity/entity1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Entity One", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/entity1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Entity One") ), # Entity with only rdf:type - should NOT create context Triple( - s=Value(value="https://example.com/entity/entity2", is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://example.com/Food", is_uri=True) + s=Term(type=IRI, iri="https://example.com/entity/entity2"), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri="http://example.com/Food") ) ] contexts = processor.build_entity_contexts(triples) assert len(contexts) == 1, "Should only create context for entity with label/definition" - assert contexts[0].entity.value == "https://example.com/entity/entity1" + assert contexts[0].entity.iri == "https://example.com/entity/entity1" class TestEntityContextEdgeCases: @@ -266,9 +266,9 @@ class TestEntityContextEdgeCases: """Test handling of unicode characters in labels.""" triples = [ Triple( - s=Value(value="https://example.com/entity/café", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Café Spécial", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/café"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Café Spécial") ) ] @@ -282,9 +282,9 @@ class TestEntityContextEdgeCases: long_def = "This is a very long definition " * 50 triples = [ Triple( - s=Value(value="https://example.com/entity/test", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value=long_def, is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/test"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value=long_def) ) ] @@ -297,9 +297,9 @@ class TestEntityContextEdgeCases: """Test handling of special characters in context text.""" triples = [ Triple( - s=Value(value="https://example.com/entity/test", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Test & Entity \"quotes\"", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/test"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Test & Entity \"quotes\"") ) ] @@ -313,27 +313,27 @@ class TestEntityContextEdgeCases: triples = [ # Label - relevant Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True), - o=Value(value="Cornish Pasty Recipe", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"), + o=Term(type=LITERAL, value="Cornish Pasty Recipe") ), # Type - irrelevant Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://example.com/Recipe", is_uri=True) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri="http://example.com/Recipe") ), # Property - irrelevant Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://example.com/produces", is_uri=True), - o=Value(value="https://example.com/entity/pasty", is_uri=True) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://example.com/produces"), + o=Term(type=IRI, iri="https://example.com/entity/pasty") ), # Definition - relevant Triple( - s=Value(value="https://example.com/entity/recipe1", is_uri=True), - p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True), - o=Value(value="Traditional British pastry recipe", is_uri=False) + s=Term(type=IRI, iri="https://example.com/entity/recipe1"), + p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"), + o=Term(type=LITERAL, value="Traditional British pastry recipe") ) ] diff --git a/tests/unit/test_extract/test_ontology/test_ontology_triples.py b/tests/unit/test_extract/test_ontology/test_ontology_triples.py index 70ade79d..50e2ef3b 100644 --- a/tests/unit/test_extract/test_ontology/test_ontology_triples.py +++ b/tests/unit/test_extract/test_ontology/test_ontology_triples.py @@ -9,7 +9,7 @@ the knowledge graph. import pytest from trustgraph.extract.kg.ontology.extract import Processor from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset -from trustgraph.schema.core.primitives import Triple, Value +from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL @pytest.fixture @@ -92,12 +92,12 @@ class TestOntologyTripleGeneration: # Find type triples for Recipe class recipe_type_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/Recipe" - and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + if t.s.iri == "http://purl.org/ontology/fo/Recipe" + and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ] assert len(recipe_type_triples) == 1, "Should generate exactly one type triple per class" - assert recipe_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#Class", \ + assert recipe_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#Class", \ "Class type should be owl:Class" def test_generates_class_labels(self, extractor, sample_ontology_subset): @@ -107,14 +107,14 @@ class TestOntologyTripleGeneration: # Find label triples for Recipe class recipe_label_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/Recipe" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#label" + if t.s.iri == "http://purl.org/ontology/fo/Recipe" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label" ] assert len(recipe_label_triples) == 1, "Should generate label triple for class" assert recipe_label_triples[0].o.value == "Recipe", \ "Label should match class label from ontology" - assert not recipe_label_triples[0].o.is_uri, \ + assert recipe_label_triples[0].o.type == LITERAL, \ "Label should be a literal, not URI" def test_generates_class_comments(self, extractor, sample_ontology_subset): @@ -124,8 +124,8 @@ class TestOntologyTripleGeneration: # Find comment triples for Recipe class recipe_comment_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/Recipe" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#comment" + if t.s.iri == "http://purl.org/ontology/fo/Recipe" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#comment" ] assert len(recipe_comment_triples) == 1, "Should generate comment triple for class" @@ -139,13 +139,13 @@ class TestOntologyTripleGeneration: # Find type triples for ingredients property ingredients_type_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/ingredients" - and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + if t.s.iri == "http://purl.org/ontology/fo/ingredients" + and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ] assert len(ingredients_type_triples) == 1, \ "Should generate exactly one type triple per object property" - assert ingredients_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#ObjectProperty", \ + assert ingredients_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#ObjectProperty", \ "Object property type should be owl:ObjectProperty" def test_generates_object_property_labels(self, extractor, sample_ontology_subset): @@ -155,8 +155,8 @@ class TestOntologyTripleGeneration: # Find label triples for ingredients property ingredients_label_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/ingredients" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#label" + if t.s.iri == "http://purl.org/ontology/fo/ingredients" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label" ] assert len(ingredients_label_triples) == 1, \ @@ -171,15 +171,15 @@ class TestOntologyTripleGeneration: # Find domain triples for ingredients property ingredients_domain_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/ingredients" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#domain" + if t.s.iri == "http://purl.org/ontology/fo/ingredients" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#domain" ] assert len(ingredients_domain_triples) == 1, \ "Should generate domain triple for object property" - assert ingredients_domain_triples[0].o.value == "http://purl.org/ontology/fo/Recipe", \ + assert ingredients_domain_triples[0].o.iri == "http://purl.org/ontology/fo/Recipe", \ "Domain should be Recipe class URI" - assert ingredients_domain_triples[0].o.is_uri, \ + assert ingredients_domain_triples[0].o.type == IRI, \ "Domain should be a URI reference" def test_generates_object_property_range(self, extractor, sample_ontology_subset): @@ -189,13 +189,13 @@ class TestOntologyTripleGeneration: # Find range triples for produces property produces_range_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/produces" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#range" + if t.s.iri == "http://purl.org/ontology/fo/produces" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#range" ] assert len(produces_range_triples) == 1, \ "Should generate range triple for object property" - assert produces_range_triples[0].o.value == "http://purl.org/ontology/fo/Food", \ + assert produces_range_triples[0].o.iri == "http://purl.org/ontology/fo/Food", \ "Range should be Food class URI" def test_generates_datatype_property_type_triples(self, extractor, sample_ontology_subset): @@ -205,13 +205,13 @@ class TestOntologyTripleGeneration: # Find type triples for serves property serves_type_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/serves" - and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + if t.s.iri == "http://purl.org/ontology/fo/serves" + and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ] assert len(serves_type_triples) == 1, \ "Should generate exactly one type triple per datatype property" - assert serves_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#DatatypeProperty", \ + assert serves_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#DatatypeProperty", \ "Datatype property type should be owl:DatatypeProperty" def test_generates_datatype_property_range(self, extractor, sample_ontology_subset): @@ -221,13 +221,13 @@ class TestOntologyTripleGeneration: # Find range triples for serves property serves_range_triples = [ t for t in triples - if t.s.value == "http://purl.org/ontology/fo/serves" - and t.p.value == "http://www.w3.org/2000/01/rdf-schema#range" + if t.s.iri == "http://purl.org/ontology/fo/serves" + and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#range" ] assert len(serves_range_triples) == 1, \ "Should generate range triple for datatype property" - assert serves_range_triples[0].o.value == "http://www.w3.org/2001/XMLSchema#string", \ + assert serves_range_triples[0].o.iri == "http://www.w3.org/2001/XMLSchema#string", \ "Range should be XSD type URI (xsd:string expanded)" def test_generates_triples_for_all_classes(self, extractor, sample_ontology_subset): @@ -236,9 +236,9 @@ class TestOntologyTripleGeneration: # Count unique class subjects class_subjects = set( - t.s.value for t in triples - if t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" - and t.o.value == "http://www.w3.org/2002/07/owl#Class" + t.s.iri for t in triples + if t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + and t.o.iri == "http://www.w3.org/2002/07/owl#Class" ) assert len(class_subjects) == 3, \ @@ -250,9 +250,9 @@ class TestOntologyTripleGeneration: # Count unique property subjects (object + datatype properties) property_subjects = set( - t.s.value for t in triples - if t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" - and ("ObjectProperty" in t.o.value or "DatatypeProperty" in t.o.value) + t.s.iri for t in triples + if t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + and ("ObjectProperty" in t.o.iri or "DatatypeProperty" in t.o.iri) ) assert len(property_subjects) == 3, \ @@ -276,7 +276,7 @@ class TestOntologyTripleGeneration: # Should still generate proper RDF triples despite dict field names label_triples = [ t for t in triples - if t.p.value == "http://www.w3.org/2000/01/rdf-schema#label" + if t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label" ] assert len(label_triples) > 0, \ "Should generate rdfs:label triples from dict 'labels' field" diff --git a/tests/unit/test_extract/test_ontology/test_prompt_and_extraction.py b/tests/unit/test_extract/test_ontology/test_prompt_and_extraction.py index e6d5bf36..9f9c8551 100644 --- a/tests/unit/test_extract/test_ontology/test_prompt_and_extraction.py +++ b/tests/unit/test_extract/test_ontology/test_prompt_and_extraction.py @@ -8,7 +8,7 @@ and extracts/validates triples from LLM responses. import pytest from trustgraph.extract.kg.ontology.extract import Processor from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset -from trustgraph.schema.core.primitives import Triple, Value +from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL @pytest.fixture @@ -248,9 +248,9 @@ class TestTripleParsing: validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset) assert len(validated) == 1, "Should parse one valid triple" - assert validated[0].s.value == "https://trustgraph.ai/food/cornish-pasty" - assert validated[0].p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" - assert validated[0].o.value == "http://purl.org/ontology/fo/Recipe" + assert validated[0].s.iri == "https://trustgraph.ai/food/cornish-pasty" + assert validated[0].p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + assert validated[0].o.iri == "http://purl.org/ontology/fo/Recipe" def test_parse_multiple_triples(self, extractor, sample_ontology_subset): """Test parsing multiple triples.""" @@ -307,11 +307,11 @@ class TestTripleParsing: assert len(validated) == 1 # Subject should be expanded to entity URI - assert validated[0].s.value.startswith("https://trustgraph.ai/food/") + assert validated[0].s.iri.startswith("https://trustgraph.ai/food/") # Predicate should be expanded to ontology URI - assert validated[0].p.value == "http://purl.org/ontology/fo/produces" + assert validated[0].p.iri == "http://purl.org/ontology/fo/produces" # Object should be expanded to class URI - assert validated[0].o.value == "http://purl.org/ontology/fo/Food" + assert validated[0].o.iri == "http://purl.org/ontology/fo/Food" def test_creates_proper_triple_objects(self, extractor, sample_ontology_subset): """Test that Triple objects are properly created.""" @@ -324,12 +324,12 @@ class TestTripleParsing: assert len(validated) == 1 triple = validated[0] assert isinstance(triple, Triple), "Should create Triple objects" - assert isinstance(triple.s, Value), "Subject should be Value object" - assert isinstance(triple.p, Value), "Predicate should be Value object" - assert isinstance(triple.o, Value), "Object should be Value object" - assert triple.s.is_uri, "Subject should be marked as URI" - assert triple.p.is_uri, "Predicate should be marked as URI" - assert not triple.o.is_uri, "Object literal should not be marked as URI" + assert isinstance(triple.s, Term), "Subject should be Term object" + assert isinstance(triple.p, Term), "Predicate should be Term object" + assert isinstance(triple.o, Term), "Object should be Term object" + assert triple.s.type == IRI, "Subject should be IRI type" + assert triple.p.type == IRI, "Predicate should be IRI type" + assert triple.o.type == LITERAL, "Object literal should be LITERAL type" class TestURIExpansionInExtraction: @@ -343,8 +343,8 @@ class TestURIExpansionInExtraction: validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset) - assert validated[0].o.value == "http://purl.org/ontology/fo/Recipe" - assert validated[0].o.is_uri, "Class reference should be URI" + assert validated[0].o.iri == "http://purl.org/ontology/fo/Recipe" + assert validated[0].o.type == IRI, "Class reference should be URI" def test_expands_property_names(self, extractor, sample_ontology_subset): """Test that property names are expanded to full URIs.""" @@ -354,7 +354,7 @@ class TestURIExpansionInExtraction: validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset) - assert validated[0].p.value == "http://purl.org/ontology/fo/produces" + assert validated[0].p.iri == "http://purl.org/ontology/fo/produces" def test_expands_entity_instances(self, extractor, sample_ontology_subset): """Test that entity instances get constructed URIs.""" @@ -364,8 +364,8 @@ class TestURIExpansionInExtraction: validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset) - assert validated[0].s.value.startswith("https://trustgraph.ai/food/") - assert "my-special-recipe" in validated[0].s.value + assert validated[0].s.iri.startswith("https://trustgraph.ai/food/") + assert "my-special-recipe" in validated[0].s.iri class TestEdgeCases: diff --git a/tests/unit/test_gateway/test_dispatch_serialize.py b/tests/unit/test_gateway/test_dispatch_serialize.py index e117629b..5d546adf 100644 --- a/tests/unit/test_gateway/test_dispatch_serialize.py +++ b/tests/unit/test_gateway/test_dispatch_serialize.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock from trustgraph.gateway.dispatch.serialize import to_value, to_subgraph, serialize_value -from trustgraph.schema import Value, Triple +from trustgraph.schema import Term, Triple, IRI, LITERAL class TestDispatchSerialize: @@ -14,55 +14,55 @@ class TestDispatchSerialize: def test_to_value_with_uri(self): """Test to_value function with URI""" - input_data = {"v": "http://example.com/resource", "e": True} - + input_data = {"t": "i", "i": "http://example.com/resource"} + result = to_value(input_data) - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI def test_to_value_with_literal(self): """Test to_value function with literal value""" - input_data = {"v": "literal string", "e": False} - + input_data = {"t": "l", "v": "literal string"} + result = to_value(input_data) - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "literal string" - assert result.is_uri is False + assert result.type == LITERAL def test_to_subgraph_with_multiple_triples(self): """Test to_subgraph function with multiple triples""" input_data = [ { - "s": {"v": "subject1", "e": True}, - "p": {"v": "predicate1", "e": True}, - "o": {"v": "object1", "e": False} + "s": {"t": "i", "i": "subject1"}, + "p": {"t": "i", "i": "predicate1"}, + "o": {"t": "l", "v": "object1"} }, { - "s": {"v": "subject2", "e": False}, - "p": {"v": "predicate2", "e": True}, - "o": {"v": "object2", "e": True} + "s": {"t": "l", "v": "subject2"}, + "p": {"t": "i", "i": "predicate2"}, + "o": {"t": "i", "i": "object2"} } ] - + result = to_subgraph(input_data) - + assert len(result) == 2 assert all(isinstance(triple, Triple) for triple in result) - + # Check first triple - assert result[0].s.value == "subject1" - assert result[0].s.is_uri is True - assert result[0].p.value == "predicate1" - assert result[0].p.is_uri is True + assert result[0].s.iri == "subject1" + assert result[0].s.type == IRI + assert result[0].p.iri == "predicate1" + assert result[0].p.type == IRI assert result[0].o.value == "object1" - assert result[0].o.is_uri is False - + assert result[0].o.type == LITERAL + # Check second triple assert result[1].s.value == "subject2" - assert result[1].s.is_uri is False + assert result[1].s.type == LITERAL def test_to_subgraph_with_empty_list(self): """Test to_subgraph function with empty input""" @@ -74,16 +74,16 @@ class TestDispatchSerialize: def test_serialize_value_with_uri(self): """Test serialize_value function with URI value""" - value = Value(value="http://example.com/test", is_uri=True) - - result = serialize_value(value) - - assert result == {"v": "http://example.com/test", "e": True} + term = Term(type=IRI, iri="http://example.com/test") + + result = serialize_value(term) + + assert result == {"t": "i", "i": "http://example.com/test"} def test_serialize_value_with_literal(self): """Test serialize_value function with literal value""" - value = Value(value="test literal", is_uri=False) - - result = serialize_value(value) - - assert result == {"v": "test literal", "e": False} \ No newline at end of file + term = Term(type=LITERAL, value="test literal") + + result = serialize_value(term) + + assert result == {"t": "l", "v": "test literal"} \ No newline at end of file diff --git a/tests/unit/test_knowledge_graph/conftest.py b/tests/unit/test_knowledge_graph/conftest.py index d4a83054..e7f83b58 100644 --- a/tests/unit/test_knowledge_graph/conftest.py +++ b/tests/unit/test_knowledge_graph/conftest.py @@ -6,11 +6,21 @@ import pytest from unittest.mock import Mock, AsyncMock # Mock schema classes for testing -class Value: - def __init__(self, value, is_uri, type): - self.value = value - self.is_uri = is_uri +# Term type constants +IRI = "i" +LITERAL = "l" +BLANK = "b" +TRIPLE = "t" + +class Term: + def __init__(self, type, iri=None, value=None, id=None, datatype=None, language=None, triple=None): self.type = type + self.iri = iri + self.value = value + self.id = id + self.datatype = datatype + self.language = language + self.triple = triple class Triple: def __init__(self, s, p, o): @@ -66,32 +76,30 @@ def sample_relationships(): @pytest.fixture -def sample_value_uri(): - """Sample URI Value object""" - return Value( - value="http://example.com/person/john-smith", - is_uri=True, - type="" +def sample_term_uri(): + """Sample URI Term object""" + return Term( + type=IRI, + iri="http://example.com/person/john-smith" ) @pytest.fixture -def sample_value_literal(): - """Sample literal Value object""" - return Value( - value="John Smith", - is_uri=False, - type="string" +def sample_term_literal(): + """Sample literal Term object""" + return Term( + type=LITERAL, + value="John Smith" ) @pytest.fixture -def sample_triple(sample_value_uri, sample_value_literal): +def sample_triple(sample_term_uri, sample_term_literal): """Sample Triple object""" return Triple( - s=sample_value_uri, - p=Value(value="http://schema.org/name", is_uri=True, type=""), - o=sample_value_literal + s=sample_term_uri, + p=Term(type=IRI, iri="http://schema.org/name"), + o=sample_term_literal ) diff --git a/tests/unit/test_knowledge_graph/test_agent_extraction.py b/tests/unit/test_knowledge_graph/test_agent_extraction.py index 626eba42..a3a0f9a7 100644 --- a/tests/unit/test_knowledge_graph/test_agent_extraction.py +++ b/tests/unit/test_knowledge_graph/test_agent_extraction.py @@ -11,7 +11,7 @@ import json from unittest.mock import AsyncMock, MagicMock, patch from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor -from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error +from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL from trustgraph.schema import EntityContext, EntityContexts from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF from trustgraph.template.prompt_manager import PromptManager @@ -53,9 +53,9 @@ class TestAgentKgExtractor: id="doc123", metadata=[ Triple( - s=Value(value="doc123", is_uri=True), - p=Value(value="http://example.org/type", is_uri=True), - o=Value(value="document", is_uri=False) + s=Term(type=IRI, iri="doc123"), + p=Term(type=IRI, iri="http://example.org/type"), + o=Term(type=LITERAL, value="document") ) ] ) @@ -178,27 +178,27 @@ This is not JSON at all triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata) # Check entity label triple - label_triple = next((t for t in triples if t.p.value == RDF_LABEL and t.o.value == "Machine Learning"), None) + label_triple = next((t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "Machine Learning"), None) assert label_triple is not None - assert label_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" - assert label_triple.s.is_uri == True - assert label_triple.o.is_uri == False - + assert label_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" + assert label_triple.s.type == IRI + assert label_triple.o.type == LITERAL + # Check definition triple - def_triple = next((t for t in triples if t.p.value == DEFINITION), None) + def_triple = next((t for t in triples if t.p.iri == DEFINITION), None) assert def_triple is not None - assert def_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" + assert def_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" assert def_triple.o.value == "A subset of AI that enables learning from data." - + # Check subject-of triple - subject_of_triple = next((t for t in triples if t.p.value == SUBJECT_OF), None) + subject_of_triple = next((t for t in triples if t.p.iri == SUBJECT_OF), None) assert subject_of_triple is not None - assert subject_of_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" - assert subject_of_triple.o.value == "doc123" - + assert subject_of_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" + assert subject_of_triple.o.iri == "doc123" + # Check entity context assert len(entity_contexts) == 1 - assert entity_contexts[0].entity.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" + assert entity_contexts[0].entity.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" assert entity_contexts[0].context == "A subset of AI that enables learning from data." def test_process_extraction_data_relationships(self, agent_extractor, sample_metadata): @@ -218,25 +218,25 @@ This is not JSON at all # Check that subject, predicate, and object labels are created subject_uri = f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" predicate_uri = f"{TRUSTGRAPH_ENTITIES}is_subset_of" - + # Find label triples - subject_label = next((t for t in triples if t.s.value == subject_uri and t.p.value == RDF_LABEL), None) + subject_label = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == RDF_LABEL), None) assert subject_label is not None assert subject_label.o.value == "Machine Learning" - - predicate_label = next((t for t in triples if t.s.value == predicate_uri and t.p.value == RDF_LABEL), None) + + predicate_label = next((t for t in triples if t.s.iri == predicate_uri and t.p.iri == RDF_LABEL), None) assert predicate_label is not None assert predicate_label.o.value == "is_subset_of" - + # Check main relationship triple object_uri = f"{TRUSTGRAPH_ENTITIES}Artificial%20Intelligence" - rel_triple = next((t for t in triples if t.s.value == subject_uri and t.p.value == predicate_uri), None) + rel_triple = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == predicate_uri), None) assert rel_triple is not None - assert rel_triple.o.value == object_uri - assert rel_triple.o.is_uri == True - + assert rel_triple.o.iri == object_uri + assert rel_triple.o.type == IRI + # Check subject-of relationships - subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF and t.o.value == "doc123"] + subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF and t.o.iri == "doc123"] assert len(subject_of_triples) >= 2 # At least subject and predicate should have subject-of relations def test_process_extraction_data_literal_object(self, agent_extractor, sample_metadata): @@ -254,7 +254,7 @@ This is not JSON at all triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata) # Check that object labels are not created for literal objects - object_labels = [t for t in triples if t.p.value == RDF_LABEL and t.o.value == "95%"] + object_labels = [t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "95%"] # Based on the code logic, it should not create object labels for non-entity objects # But there might be a bug in the original implementation @@ -263,12 +263,12 @@ This is not JSON at all triples, entity_contexts = agent_extractor.process_extraction_data(sample_extraction_data, sample_metadata) # Check that we have both definition and relationship triples - definition_triples = [t for t in triples if t.p.value == DEFINITION] + definition_triples = [t for t in triples if t.p.iri == DEFINITION] assert len(definition_triples) == 2 # Two definitions - + # Check entity contexts are created for definitions assert len(entity_contexts) == 2 - entity_uris = [ec.entity.value for ec in entity_contexts] + entity_uris = [ec.entity.iri for ec in entity_contexts] assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris @@ -282,7 +282,7 @@ This is not JSON at all triples, entity_contexts = agent_extractor.process_extraction_data(data, metadata) # Should not create subject-of relationships when no metadata ID - subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF] + subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF] assert len(subject_of_triples) == 0 # Should still create entity contexts @@ -327,17 +327,17 @@ This is not JSON at all async def test_emit_triples(self, agent_extractor, sample_metadata): """Test emitting triples to publisher""" mock_publisher = AsyncMock() - + test_triples = [ Triple( - s=Value(value="test:subject", is_uri=True), - p=Value(value="test:predicate", is_uri=True), - o=Value(value="test object", is_uri=False) + s=Term(type=IRI, iri="test:subject"), + p=Term(type=IRI, iri="test:predicate"), + o=Term(type=LITERAL, value="test object") ) ] - + await agent_extractor.emit_triples(mock_publisher, sample_metadata, test_triples) - + mock_publisher.send.assert_called_once() sent_triples = mock_publisher.send.call_args[0][0] assert isinstance(sent_triples, Triples) @@ -348,22 +348,22 @@ This is not JSON at all # Note: metadata.metadata is now empty array in the new implementation assert sent_triples.metadata.metadata == [] assert len(sent_triples.triples) == 1 - assert sent_triples.triples[0].s.value == "test:subject" + assert sent_triples.triples[0].s.iri == "test:subject" @pytest.mark.asyncio async def test_emit_entity_contexts(self, agent_extractor, sample_metadata): """Test emitting entity contexts to publisher""" mock_publisher = AsyncMock() - + test_contexts = [ EntityContext( - entity=Value(value="test:entity", is_uri=True), + entity=Term(type=IRI, iri="test:entity"), context="Test context" ) ] - + await agent_extractor.emit_entity_contexts(mock_publisher, sample_metadata, test_contexts) - + mock_publisher.send.assert_called_once() sent_contexts = mock_publisher.send.call_args[0][0] assert isinstance(sent_contexts, EntityContexts) @@ -374,7 +374,7 @@ This is not JSON at all # Note: metadata.metadata is now empty array in the new implementation assert sent_contexts.metadata.metadata == [] assert len(sent_contexts.entities) == 1 - assert sent_contexts.entities[0].entity.value == "test:entity" + assert sent_contexts.entities[0].entity.iri == "test:entity" def test_agent_extractor_initialization_params(self): """Test agent extractor parameter validation""" diff --git a/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py b/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py index a5190d4d..f66e5da6 100644 --- a/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py +++ b/tests/unit/test_knowledge_graph/test_agent_extraction_edge_cases.py @@ -11,7 +11,7 @@ import urllib.parse from unittest.mock import AsyncMock, MagicMock from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor -from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value +from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL from trustgraph.schema import EntityContext, EntityContexts from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF @@ -188,7 +188,7 @@ class TestAgentKgExtractionEdgeCases: triples, contexts = agent_extractor.process_extraction_data(data, metadata) # Should not create subject-of triples when ID is empty string - subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF] + subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF] assert len(subject_of_triples) == 0 def test_process_extraction_data_special_entity_names(self, agent_extractor): @@ -221,7 +221,7 @@ class TestAgentKgExtractionEdgeCases: # Verify URIs were properly encoded for i, entity in enumerate(special_entities): expected_uri = f"{TRUSTGRAPH_ENTITIES}{urllib.parse.quote(entity)}" - assert contexts[i].entity.value == expected_uri + assert contexts[i].entity.iri == expected_uri def test_process_extraction_data_very_long_definitions(self, agent_extractor): """Test processing with very long entity definitions""" @@ -241,7 +241,7 @@ class TestAgentKgExtractionEdgeCases: assert contexts[0].context == long_definition # Find definition triple - def_triple = next((t for t in triples if t.p.value == DEFINITION), None) + def_triple = next((t for t in triples if t.p.iri == DEFINITION), None) assert def_triple is not None assert def_triple.o.value == long_definition @@ -262,7 +262,7 @@ class TestAgentKgExtractionEdgeCases: assert len(contexts) == 4 # Check that both definitions for "Machine Learning" are present - ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.value] + ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.iri] assert len(ml_contexts) == 2 assert ml_contexts[0].context == "First definition" assert ml_contexts[1].context == "Second definition" @@ -286,7 +286,7 @@ class TestAgentKgExtractionEdgeCases: assert len(contexts) == 3 # Empty entity should create empty URI after encoding - empty_entity_context = next((ec for ec in contexts if ec.entity.value == TRUSTGRAPH_ENTITIES), None) + empty_entity_context = next((ec for ec in contexts if ec.entity.iri == TRUSTGRAPH_ENTITIES), None) assert empty_entity_context is not None def test_process_extraction_data_nested_json_in_strings(self, agent_extractor): @@ -338,7 +338,7 @@ class TestAgentKgExtractionEdgeCases: # Should process all relationships # Note: The current implementation has some logic issues that these tests document - assert len([t for t in triples if t.p.value != RDF_LABEL and t.p.value != SUBJECT_OF]) >= 7 + assert len([t for t in triples if t.p.iri != RDF_LABEL and t.p.iri != SUBJECT_OF]) >= 7 @pytest.mark.asyncio async def test_emit_empty_collections(self, agent_extractor): diff --git a/tests/unit/test_knowledge_graph/test_graph_validation.py b/tests/unit/test_knowledge_graph/test_graph_validation.py index fd6e12cf..e9e2750b 100644 --- a/tests/unit/test_knowledge_graph/test_graph_validation.py +++ b/tests/unit/test_knowledge_graph/test_graph_validation.py @@ -7,7 +7,7 @@ processing graph structures, and performing graph operations. import pytest from unittest.mock import Mock -from .conftest import Triple, Value, Metadata +from .conftest import Triple, Metadata from collections import defaultdict, deque diff --git a/tests/unit/test_knowledge_graph/test_triple_construction.py b/tests/unit/test_knowledge_graph/test_triple_construction.py index b1cf1274..10bae2e7 100644 --- a/tests/unit/test_knowledge_graph/test_triple_construction.py +++ b/tests/unit/test_knowledge_graph/test_triple_construction.py @@ -2,13 +2,13 @@ Unit tests for triple construction logic Tests the core business logic for constructing RDF triples from extracted -entities and relationships, including URI generation, Value object creation, +entities and relationships, including URI generation, Term object creation, and triple validation. """ import pytest from unittest.mock import Mock -from .conftest import Triple, Triples, Value, Metadata +from .conftest import Triple, Triples, Term, Metadata, IRI, LITERAL import re import hashlib @@ -48,80 +48,82 @@ class TestTripleConstructionLogic: generated_uri = generate_uri(text, entity_type) assert generated_uri == expected_uri, f"URI generation failed for '{text}'" - def test_value_object_creation(self): - """Test creation of Value objects for subjects, predicates, and objects""" + def test_term_object_creation(self): + """Test creation of Term objects for subjects, predicates, and objects""" # Arrange - def create_value_object(text, is_uri, value_type=""): - return Value( - value=text, - is_uri=is_uri, - type=value_type - ) - + def create_term_object(text, is_uri, datatype=""): + if is_uri: + return Term(type=IRI, iri=text) + else: + return Term(type=LITERAL, value=text, datatype=datatype if datatype else None) + test_cases = [ ("http://trustgraph.ai/kg/person/john-smith", True, ""), ("John Smith", False, "string"), ("42", False, "integer"), ("http://schema.org/worksFor", True, "") ] - + # Act & Assert - for value_text, is_uri, value_type in test_cases: - value_obj = create_value_object(value_text, is_uri, value_type) - - assert isinstance(value_obj, Value) - assert value_obj.value == value_text - assert value_obj.is_uri == is_uri - assert value_obj.type == value_type + for value_text, is_uri, datatype in test_cases: + term_obj = create_term_object(value_text, is_uri, datatype) + + assert isinstance(term_obj, Term) + if is_uri: + assert term_obj.type == IRI + assert term_obj.iri == value_text + else: + assert term_obj.type == LITERAL + assert term_obj.value == value_text def test_triple_construction_from_relationship(self): """Test constructing Triple objects from relationships""" # Arrange relationship = { "subject": "John Smith", - "predicate": "works_for", + "predicate": "works_for", "object": "OpenAI", "subject_type": "PERSON", "object_type": "ORG" } - + def construct_triple(relationship, uri_base="http://trustgraph.ai/kg"): # Generate URIs subject_uri = f"{uri_base}/person/{relationship['subject'].lower().replace(' ', '-')}" object_uri = f"{uri_base}/org/{relationship['object'].lower().replace(' ', '-')}" - + # Map predicate to schema.org URI predicate_mappings = { "works_for": "http://schema.org/worksFor", "located_in": "http://schema.org/location", "developed": "http://schema.org/creator" } - predicate_uri = predicate_mappings.get(relationship["predicate"], + predicate_uri = predicate_mappings.get(relationship["predicate"], f"{uri_base}/predicate/{relationship['predicate']}") - - # Create Value objects - subject_value = Value(value=subject_uri, is_uri=True, type="") - predicate_value = Value(value=predicate_uri, is_uri=True, type="") - object_value = Value(value=object_uri, is_uri=True, type="") - + + # Create Term objects + subject_term = Term(type=IRI, iri=subject_uri) + predicate_term = Term(type=IRI, iri=predicate_uri) + object_term = Term(type=IRI, iri=object_uri) + # Create Triple return Triple( - s=subject_value, - p=predicate_value, - o=object_value + s=subject_term, + p=predicate_term, + o=object_term ) - + # Act triple = construct_triple(relationship) - + # Assert assert isinstance(triple, Triple) - assert triple.s.value == "http://trustgraph.ai/kg/person/john-smith" - assert triple.s.is_uri is True - assert triple.p.value == "http://schema.org/worksFor" - assert triple.p.is_uri is True - assert triple.o.value == "http://trustgraph.ai/kg/org/openai" - assert triple.o.is_uri is True + assert triple.s.iri == "http://trustgraph.ai/kg/person/john-smith" + assert triple.s.type == IRI + assert triple.p.iri == "http://schema.org/worksFor" + assert triple.p.type == IRI + assert triple.o.iri == "http://trustgraph.ai/kg/org/openai" + assert triple.o.type == IRI def test_literal_value_handling(self): """Test handling of literal values vs URI values""" @@ -132,10 +134,10 @@ class TestTripleConstructionLogic: ("John Smith", "email", "john@example.com", False), # Literal email ("John Smith", "worksFor", "http://trustgraph.ai/kg/org/openai", True) # URI reference ] - + def create_triple_with_literal(subject_uri, predicate, object_value, object_is_uri): - subject_val = Value(value=subject_uri, is_uri=True, type="") - + subject_term = Term(type=IRI, iri=subject_uri) + # Determine predicate URI predicate_mappings = { "name": "http://schema.org/name", @@ -144,32 +146,37 @@ class TestTripleConstructionLogic: "worksFor": "http://schema.org/worksFor" } predicate_uri = predicate_mappings.get(predicate, f"http://trustgraph.ai/kg/predicate/{predicate}") - predicate_val = Value(value=predicate_uri, is_uri=True, type="") - - # Create object value with appropriate type - object_type = "" - if not object_is_uri: + predicate_term = Term(type=IRI, iri=predicate_uri) + + # Create object term with appropriate type + if object_is_uri: + object_term = Term(type=IRI, iri=object_value) + else: + datatype = None if predicate == "age": - object_type = "integer" + datatype = "integer" elif predicate in ["name", "email"]: - object_type = "string" - - object_val = Value(value=object_value, is_uri=object_is_uri, type=object_type) - - return Triple(s=subject_val, p=predicate_val, o=object_val) - + datatype = "string" + object_term = Term(type=LITERAL, value=object_value, datatype=datatype) + + return Triple(s=subject_term, p=predicate_term, o=object_term) + # Act & Assert for subject_uri, predicate, object_value, object_is_uri in test_data: subject_full_uri = "http://trustgraph.ai/kg/person/john-smith" triple = create_triple_with_literal(subject_full_uri, predicate, object_value, object_is_uri) - - assert triple.o.is_uri == object_is_uri - assert triple.o.value == object_value - + + if object_is_uri: + assert triple.o.type == IRI + assert triple.o.iri == object_value + else: + assert triple.o.type == LITERAL + assert triple.o.value == object_value + if predicate == "age": - assert triple.o.type == "integer" + assert triple.o.datatype == "integer" elif predicate in ["name", "email"]: - assert triple.o.type == "string" + assert triple.o.datatype == "string" def test_namespace_management(self): """Test namespace prefix management and expansion""" @@ -216,63 +223,74 @@ class TestTripleConstructionLogic: def test_triple_validation(self): """Test triple validation rules""" # Arrange + def get_term_value(term): + """Extract value from a Term""" + if term.type == IRI: + return term.iri + else: + return term.value + def validate_triple(triple): errors = [] - + # Check required components - if not triple.s or not triple.s.value: + s_val = get_term_value(triple.s) if triple.s else None + p_val = get_term_value(triple.p) if triple.p else None + o_val = get_term_value(triple.o) if triple.o else None + + if not triple.s or not s_val: errors.append("Missing or empty subject") - - if not triple.p or not triple.p.value: + + if not triple.p or not p_val: errors.append("Missing or empty predicate") - - if not triple.o or not triple.o.value: + + if not triple.o or not o_val: errors.append("Missing or empty object") - + # Check URI validity for URI values uri_pattern = r'^https?://[^\s/$.?#].[^\s]*$' - - if triple.s.is_uri and not re.match(uri_pattern, triple.s.value): + + if triple.s.type == IRI and not re.match(uri_pattern, triple.s.iri or ""): errors.append("Invalid subject URI format") - - if triple.p.is_uri and not re.match(uri_pattern, triple.p.value): + + if triple.p.type == IRI and not re.match(uri_pattern, triple.p.iri or ""): errors.append("Invalid predicate URI format") - - if triple.o.is_uri and not re.match(uri_pattern, triple.o.value): + + if triple.o.type == IRI and not re.match(uri_pattern, triple.o.iri or ""): errors.append("Invalid object URI format") - + # Predicates should typically be URIs - if not triple.p.is_uri: + if triple.p.type != IRI: errors.append("Predicate should be a URI") - + return len(errors) == 0, errors - + # Test valid triple valid_triple = Triple( - s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""), - p=Value(value="http://schema.org/name", is_uri=True, type=""), - o=Value(value="John Smith", is_uri=False, type="string") + s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"), + p=Term(type=IRI, iri="http://schema.org/name"), + o=Term(type=LITERAL, value="John Smith", datatype="string") ) - + # Test invalid triples invalid_triples = [ - Triple(s=Value(value="", is_uri=True, type=""), - p=Value(value="http://schema.org/name", is_uri=True, type=""), - o=Value(value="John", is_uri=False, type="")), # Empty subject - - Triple(s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""), - p=Value(value="name", is_uri=False, type=""), # Non-URI predicate - o=Value(value="John", is_uri=False, type="")), - - Triple(s=Value(value="invalid-uri", is_uri=True, type=""), - p=Value(value="http://schema.org/name", is_uri=True, type=""), - o=Value(value="John", is_uri=False, type="")) # Invalid URI format + Triple(s=Term(type=IRI, iri=""), + p=Term(type=IRI, iri="http://schema.org/name"), + o=Term(type=LITERAL, value="John")), # Empty subject + + Triple(s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"), + p=Term(type=LITERAL, value="name"), # Non-URI predicate + o=Term(type=LITERAL, value="John")), + + Triple(s=Term(type=IRI, iri="invalid-uri"), + p=Term(type=IRI, iri="http://schema.org/name"), + o=Term(type=LITERAL, value="John")) # Invalid URI format ] - + # Act & Assert is_valid, errors = validate_triple(valid_triple) assert is_valid, f"Valid triple failed validation: {errors}" - + for invalid_triple in invalid_triples: is_valid, errors = validate_triple(invalid_triple) assert not is_valid, f"Invalid triple passed validation: {invalid_triple}" @@ -286,97 +304,97 @@ class TestTripleConstructionLogic: {"text": "OpenAI", "type": "ORG"}, {"text": "San Francisco", "type": "PLACE"} ] - + relationships = [ {"subject": "John Smith", "predicate": "works_for", "object": "OpenAI"}, {"subject": "OpenAI", "predicate": "located_in", "object": "San Francisco"} ] - + def construct_triple_batch(entities, relationships, document_id="doc-1"): triples = [] - + # Create type triples for entities for entity in entities: entity_uri = f"http://trustgraph.ai/kg/{entity['type'].lower()}/{entity['text'].lower().replace(' ', '-')}" type_uri = f"http://trustgraph.ai/kg/type/{entity['type']}" - + type_triple = Triple( - s=Value(value=entity_uri, is_uri=True, type=""), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True, type=""), - o=Value(value=type_uri, is_uri=True, type="") + s=Term(type=IRI, iri=entity_uri), + p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + o=Term(type=IRI, iri=type_uri) ) triples.append(type_triple) - + # Create relationship triples for rel in relationships: subject_uri = f"http://trustgraph.ai/kg/entity/{rel['subject'].lower().replace(' ', '-')}" object_uri = f"http://trustgraph.ai/kg/entity/{rel['object'].lower().replace(' ', '-')}" predicate_uri = f"http://schema.org/{rel['predicate'].replace('_', '')}" - + rel_triple = Triple( - s=Value(value=subject_uri, is_uri=True, type=""), - p=Value(value=predicate_uri, is_uri=True, type=""), - o=Value(value=object_uri, is_uri=True, type="") + s=Term(type=IRI, iri=subject_uri), + p=Term(type=IRI, iri=predicate_uri), + o=Term(type=IRI, iri=object_uri) ) triples.append(rel_triple) - + return triples - + # Act triples = construct_triple_batch(entities, relationships) - + # Assert assert len(triples) == len(entities) + len(relationships) # Type triples + relationship triples - + # Check that all triples are valid Triple objects for triple in triples: assert isinstance(triple, Triple) - assert triple.s.value != "" - assert triple.p.value != "" - assert triple.o.value != "" + assert triple.s.iri != "" + assert triple.p.iri != "" + assert triple.o.iri != "" def test_triples_batch_object_creation(self): """Test creating Triples batch objects with metadata""" # Arrange sample_triples = [ Triple( - s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""), - p=Value(value="http://schema.org/name", is_uri=True, type=""), - o=Value(value="John Smith", is_uri=False, type="string") + s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"), + p=Term(type=IRI, iri="http://schema.org/name"), + o=Term(type=LITERAL, value="John Smith", datatype="string") ), Triple( - s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""), - p=Value(value="http://schema.org/worksFor", is_uri=True, type=""), - o=Value(value="http://trustgraph.ai/kg/org/openai", is_uri=True, type="") + s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"), + p=Term(type=IRI, iri="http://schema.org/worksFor"), + o=Term(type=IRI, iri="http://trustgraph.ai/kg/org/openai") ) ] - + metadata = Metadata( id="test-doc-123", - user="test_user", + user="test_user", collection="test_collection", metadata=[] ) - + # Act triples_batch = Triples( metadata=metadata, triples=sample_triples ) - + # Assert assert isinstance(triples_batch, Triples) assert triples_batch.metadata.id == "test-doc-123" assert triples_batch.metadata.user == "test_user" assert triples_batch.metadata.collection == "test_collection" assert len(triples_batch.triples) == 2 - + # Check that triples are properly embedded for triple in triples_batch.triples: assert isinstance(triple, Triple) - assert isinstance(triple.s, Value) - assert isinstance(triple.p, Value) - assert isinstance(triple.o, Value) + assert isinstance(triple.s, Term) + assert isinstance(triple.p, Term) + assert isinstance(triple.o, Term) def test_uri_collision_handling(self): """Test handling of URI collisions and duplicate detection""" diff --git a/tests/unit/test_query/test_graph_embeddings_milvus_query.py b/tests/unit/test_query/test_graph_embeddings_milvus_query.py index ebacfaaf..21b6e1bf 100644 --- a/tests/unit/test_query/test_graph_embeddings_milvus_query.py +++ b/tests/unit/test_query/test_graph_embeddings_milvus_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.graph_embeddings.milvus.service import Processor -from trustgraph.schema import Value, GraphEmbeddingsRequest +from trustgraph.schema import Term, GraphEmbeddingsRequest, IRI, LITERAL class TestMilvusGraphEmbeddingsQueryProcessor: @@ -68,50 +68,50 @@ class TestMilvusGraphEmbeddingsQueryProcessor: def test_create_value_with_http_uri(self, processor): """Test create_value with HTTP URI""" result = processor.create_value("http://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI def test_create_value_with_https_uri(self, processor): """Test create_value with HTTPS URI""" result = processor.create_value("https://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "https://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "https://example.com/resource" + assert result.type == IRI def test_create_value_with_literal(self, processor): """Test create_value with literal value""" result = processor.create_value("just a literal string") - assert isinstance(result, Value) + assert isinstance(result, Term) assert result.value == "just a literal string" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_empty_string(self, processor): """Test create_value with empty string""" result = processor.create_value("") - assert isinstance(result, Value) + assert isinstance(result, Term) assert result.value == "" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_partial_uri(self, processor): """Test create_value with string that looks like URI but isn't complete""" result = processor.create_value("http") - assert isinstance(result, Value) + assert isinstance(result, Term) assert result.value == "http" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_ftp_uri(self, processor): """Test create_value with FTP URI (should not be detected as URI)""" result = processor.create_value("ftp://example.com/file") - assert isinstance(result, Value) + assert isinstance(result, Term) assert result.value == "ftp://example.com/file" - assert result.is_uri is False + assert result.type == LITERAL @pytest.mark.asyncio async def test_query_graph_embeddings_single_vector(self, processor): @@ -138,17 +138,17 @@ class TestMilvusGraphEmbeddingsQueryProcessor: [0.1, 0.2, 0.3], 'test_user', 'test_collection', limit=10 ) - # Verify results are converted to Value objects + # Verify results are converted to Term objects assert len(result) == 3 - assert isinstance(result[0], Value) - assert result[0].value == "http://example.com/entity1" - assert result[0].is_uri is True - assert isinstance(result[1], Value) - assert result[1].value == "http://example.com/entity2" - assert result[1].is_uri is True - assert isinstance(result[2], Value) + assert isinstance(result[0], Term) + assert result[0].iri == "http://example.com/entity1" + assert result[0].type == IRI + assert isinstance(result[1], Term) + assert result[1].iri == "http://example.com/entity2" + assert result[1].type == IRI + assert isinstance(result[2], Term) assert result[2].value == "literal entity" - assert result[2].is_uri is False + assert result[2].type == LITERAL @pytest.mark.asyncio async def test_query_graph_embeddings_multiple_vectors(self, processor): @@ -186,7 +186,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor: # Verify results are deduplicated and limited assert len(result) == 3 - entity_values = [r.value for r in result] + entity_values = [r.iri if r.type == IRI else r.value for r in result] assert "http://example.com/entity1" in entity_values assert "http://example.com/entity2" in entity_values assert "http://example.com/entity3" in entity_values @@ -246,7 +246,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor: # Verify duplicates are removed assert len(result) == 3 - entity_values = [r.value for r in result] + entity_values = [r.iri if r.type == IRI else r.value for r in result] assert len(set(entity_values)) == 3 # All unique assert "http://example.com/entity1" in entity_values assert "http://example.com/entity2" in entity_values @@ -346,14 +346,14 @@ class TestMilvusGraphEmbeddingsQueryProcessor: assert len(result) == 4 # Check URI entities - uri_results = [r for r in result if r.is_uri] + uri_results = [r for r in result if r.type == IRI] assert len(uri_results) == 2 - uri_values = [r.value for r in uri_results] + uri_values = [r.iri for r in uri_results] assert "http://example.com/uri_entity" in uri_values assert "https://example.com/another_uri" in uri_values # Check literal entities - literal_results = [r for r in result if not r.is_uri] + literal_results = [r for r in result if not r.type == IRI] assert len(literal_results) == 2 literal_values = [r.value for r in literal_results] assert "literal entity text" in literal_values @@ -486,7 +486,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor: # Verify results from all dimensions assert len(result) == 3 - entity_values = [r.value for r in result] + entity_values = [r.iri if r.type == IRI else r.value for r in result] assert "entity_2d" in entity_values assert "entity_4d" in entity_values assert "entity_3d" in entity_values \ No newline at end of file diff --git a/tests/unit/test_query/test_graph_embeddings_pinecone_query.py b/tests/unit/test_query/test_graph_embeddings_pinecone_query.py index 0c13e9c9..1b243113 100644 --- a/tests/unit/test_query/test_graph_embeddings_pinecone_query.py +++ b/tests/unit/test_query/test_graph_embeddings_pinecone_query.py @@ -9,7 +9,7 @@ from unittest.mock import MagicMock, patch pytest.skip("Pinecone library missing protoc_gen_openapiv2 dependency", allow_module_level=True) from trustgraph.query.graph_embeddings.pinecone.service import Processor -from trustgraph.schema import Value +from trustgraph.schema import Term, IRI, LITERAL class TestPineconeGraphEmbeddingsQueryProcessor: @@ -105,27 +105,27 @@ class TestPineconeGraphEmbeddingsQueryProcessor: uri_entity = "http://example.org/entity" value = processor.create_value(uri_entity) - assert isinstance(value, Value) + assert isinstance(value, Term) assert value.value == uri_entity - assert value.is_uri == True + assert value.type == IRI def test_create_value_https_uri(self, processor): """Test create_value method for HTTPS URI entities""" uri_entity = "https://example.org/entity" value = processor.create_value(uri_entity) - assert isinstance(value, Value) + assert isinstance(value, Term) assert value.value == uri_entity - assert value.is_uri == True + assert value.type == IRI def test_create_value_literal(self, processor): """Test create_value method for literal entities""" literal_entity = "literal_entity" value = processor.create_value(literal_entity) - assert isinstance(value, Value) + assert isinstance(value, Term) assert value.value == literal_entity - assert value.is_uri == False + assert value.type == LITERAL @pytest.mark.asyncio async def test_query_graph_embeddings_single_vector(self, processor): @@ -165,11 +165,11 @@ class TestPineconeGraphEmbeddingsQueryProcessor: # Verify results assert len(entities) == 3 assert entities[0].value == 'http://example.org/entity1' - assert entities[0].is_uri == True + assert entities[0].type == IRI assert entities[1].value == 'entity2' - assert entities[1].is_uri == False + assert entities[1].type == LITERAL assert entities[2].value == 'http://example.org/entity3' - assert entities[2].is_uri == True + assert entities[2].type == IRI @pytest.mark.asyncio async def test_query_graph_embeddings_multiple_vectors(self, processor, mock_query_message): diff --git a/tests/unit/test_query/test_graph_embeddings_qdrant_query.py b/tests/unit/test_query/test_graph_embeddings_qdrant_query.py index ab22c9df..1760c4c1 100644 --- a/tests/unit/test_query/test_graph_embeddings_qdrant_query.py +++ b/tests/unit/test_query/test_graph_embeddings_qdrant_query.py @@ -9,6 +9,7 @@ from unittest import IsolatedAsyncioTestCase # Import the service under test from trustgraph.query.graph_embeddings.qdrant.service import Processor +from trustgraph.schema import IRI, LITERAL class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase): @@ -85,10 +86,10 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase): value = processor.create_value('http://example.com/entity') # Assert - assert hasattr(value, 'value') - assert value.value == 'http://example.com/entity' - assert hasattr(value, 'is_uri') - assert value.is_uri == True + assert hasattr(value, 'iri') + assert value.iri == 'http://example.com/entity' + assert hasattr(value, 'type') + assert value.type == IRI @patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient') @patch('trustgraph.base.GraphEmbeddingsQueryService.__init__') @@ -109,10 +110,10 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase): value = processor.create_value('https://secure.example.com/entity') # Assert - assert hasattr(value, 'value') - assert value.value == 'https://secure.example.com/entity' - assert hasattr(value, 'is_uri') - assert value.is_uri == True + assert hasattr(value, 'iri') + assert value.iri == 'https://secure.example.com/entity' + assert hasattr(value, 'type') + assert value.type == IRI @patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient') @patch('trustgraph.base.GraphEmbeddingsQueryService.__init__') @@ -135,8 +136,8 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase): # Assert assert hasattr(value, 'value') assert value.value == 'regular entity name' - assert hasattr(value, 'is_uri') - assert value.is_uri == False + assert hasattr(value, 'type') + assert value.type == LITERAL @patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient') @patch('trustgraph.base.GraphEmbeddingsQueryService.__init__') @@ -428,14 +429,14 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase): assert len(result) == 3 # Check URI entities - uri_entities = [entity for entity in result if hasattr(entity, 'is_uri') and entity.is_uri] + uri_entities = [entity for entity in result if entity.type == IRI] assert len(uri_entities) == 2 - uri_values = [entity.value for entity in uri_entities] + uri_values = [entity.iri for entity in uri_entities] assert 'http://example.com/entity1' in uri_values assert 'https://secure.example.com/entity2' in uri_values - + # Check regular entities - regular_entities = [entity for entity in result if hasattr(entity, 'is_uri') and not entity.is_uri] + regular_entities = [entity for entity in result if entity.type == LITERAL] assert len(regular_entities) == 1 assert regular_entities[0].value == 'regular entity' diff --git a/tests/unit/test_query/test_memgraph_user_collection_query.py b/tests/unit/test_query/test_memgraph_user_collection_query.py index 772d4f84..038fb438 100644 --- a/tests/unit/test_query/test_memgraph_user_collection_query.py +++ b/tests/unit/test_query/test_memgraph_user_collection_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.triples.memgraph.service import Processor -from trustgraph.schema import TriplesQueryRequest, Value +from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL class TestMemgraphQueryUserCollectionIsolation: @@ -24,9 +24,9 @@ class TestMemgraphQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="test_object", is_uri=False), + s=Term(type=IRI, iri="http://example.com/s"), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="test_object"), limit=1000 ) @@ -65,8 +65,8 @@ class TestMemgraphQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), + p=Term(type=IRI, iri="http://example.com/p"), o=None, limit=1000 ) @@ -105,9 +105,9 @@ class TestMemgraphQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, - o=Value(value="http://example.com/o", is_uri=True), + o=Term(type=IRI, iri="http://example.com/o"), limit=1000 ) @@ -145,7 +145,7 @@ class TestMemgraphQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=1000 @@ -185,8 +185,8 @@ class TestMemgraphQueryUserCollectionIsolation: user="test_user", collection="test_collection", s=None, - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="literal", is_uri=False), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="literal"), limit=1000 ) @@ -225,7 +225,7 @@ class TestMemgraphQueryUserCollectionIsolation: user="test_user", collection="test_collection", s=None, - p=Value(value="http://example.com/p", is_uri=True), + p=Term(type=IRI, iri="http://example.com/p"), o=None, limit=1000 ) @@ -265,7 +265,7 @@ class TestMemgraphQueryUserCollectionIsolation: collection="test_collection", s=None, p=None, - o=Value(value="test_value", is_uri=False), + o=Term(type=LITERAL, value="test_value"), limit=1000 ) @@ -355,7 +355,7 @@ class TestMemgraphQueryUserCollectionIsolation: # Query without user/collection fields query = TriplesQueryRequest( - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=1000 @@ -385,7 +385,7 @@ class TestMemgraphQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=1000 @@ -416,17 +416,17 @@ class TestMemgraphQueryUserCollectionIsolation: assert len(result) == 2 # First triple (literal object) - assert result[0].s.value == "http://example.com/s" - assert result[0].s.is_uri == True - assert result[0].p.value == "http://example.com/p1" - assert result[0].p.is_uri == True + assert result[0].s.iri == "http://example.com/s" + assert result[0].s.type == IRI + assert result[0].p.iri == "http://example.com/p1" + assert result[0].p.type == IRI assert result[0].o.value == "literal_value" - assert result[0].o.is_uri == False - + assert result[0].o.type == LITERAL + # Second triple (URI object) - assert result[1].s.value == "http://example.com/s" - assert result[1].s.is_uri == True - assert result[1].p.value == "http://example.com/p2" - assert result[1].p.is_uri == True - assert result[1].o.value == "http://example.com/o" - assert result[1].o.is_uri == True \ No newline at end of file + assert result[1].s.iri == "http://example.com/s" + assert result[1].s.type == IRI + assert result[1].p.iri == "http://example.com/p2" + assert result[1].p.type == IRI + assert result[1].o.iri == "http://example.com/o" + assert result[1].o.type == IRI \ No newline at end of file diff --git a/tests/unit/test_query/test_neo4j_user_collection_query.py b/tests/unit/test_query/test_neo4j_user_collection_query.py index bbcaeb9a..d9cf1eb4 100644 --- a/tests/unit/test_query/test_neo4j_user_collection_query.py +++ b/tests/unit/test_query/test_neo4j_user_collection_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.triples.neo4j.service import Processor -from trustgraph.schema import TriplesQueryRequest, Value +from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL class TestNeo4jQueryUserCollectionIsolation: @@ -24,9 +24,9 @@ class TestNeo4jQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="test_object", is_uri=False), + s=Term(type=IRI, iri="http://example.com/s"), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="test_object"), limit=10 ) @@ -65,8 +65,8 @@ class TestNeo4jQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), + p=Term(type=IRI, iri="http://example.com/p"), o=None, limit=10 ) @@ -123,9 +123,9 @@ class TestNeo4jQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, - o=Value(value="http://example.com/o", is_uri=True), + o=Term(type=IRI, iri="http://example.com/o"), limit=10 ) @@ -163,7 +163,7 @@ class TestNeo4jQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=10 @@ -203,8 +203,8 @@ class TestNeo4jQueryUserCollectionIsolation: user="test_user", collection="test_collection", s=None, - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="literal", is_uri=False), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="literal"), limit=10 ) @@ -243,7 +243,7 @@ class TestNeo4jQueryUserCollectionIsolation: user="test_user", collection="test_collection", s=None, - p=Value(value="http://example.com/p", is_uri=True), + p=Term(type=IRI, iri="http://example.com/p"), o=None, limit=10 ) @@ -283,7 +283,7 @@ class TestNeo4jQueryUserCollectionIsolation: collection="test_collection", s=None, p=None, - o=Value(value="test_value", is_uri=False), + o=Term(type=LITERAL, value="test_value"), limit=10 ) @@ -373,7 +373,7 @@ class TestNeo4jQueryUserCollectionIsolation: # Query without user/collection fields query = TriplesQueryRequest( - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=10 @@ -403,7 +403,7 @@ class TestNeo4jQueryUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/s", is_uri=True), + s=Term(type=IRI, iri="http://example.com/s"), p=None, o=None, limit=10 @@ -434,17 +434,17 @@ class TestNeo4jQueryUserCollectionIsolation: assert len(result) == 2 # First triple (literal object) - assert result[0].s.value == "http://example.com/s" - assert result[0].s.is_uri == True - assert result[0].p.value == "http://example.com/p1" - assert result[0].p.is_uri == True + assert result[0].s.iri == "http://example.com/s" + assert result[0].s.type == IRI + assert result[0].p.iri == "http://example.com/p1" + assert result[0].p.type == IRI assert result[0].o.value == "literal_value" - assert result[0].o.is_uri == False - + assert result[0].o.type == LITERAL + # Second triple (URI object) - assert result[1].s.value == "http://example.com/s" - assert result[1].s.is_uri == True - assert result[1].p.value == "http://example.com/p2" - assert result[1].p.is_uri == True - assert result[1].o.value == "http://example.com/o" - assert result[1].o.is_uri == True \ No newline at end of file + assert result[1].s.iri == "http://example.com/s" + assert result[1].s.type == IRI + assert result[1].p.iri == "http://example.com/p2" + assert result[1].p.type == IRI + assert result[1].o.iri == "http://example.com/o" + assert result[1].o.type == IRI \ No newline at end of file diff --git a/tests/unit/test_query/test_triples_cassandra_query.py b/tests/unit/test_query/test_triples_cassandra_query.py index f5be4961..0c5dc29c 100644 --- a/tests/unit/test_query/test_triples_cassandra_query.py +++ b/tests/unit/test_query/test_triples_cassandra_query.py @@ -5,8 +5,8 @@ Tests for Cassandra triples query service import pytest from unittest.mock import MagicMock, patch -from trustgraph.query.triples.cassandra.service import Processor -from trustgraph.schema import Value +from trustgraph.query.triples.cassandra.service import Processor, create_term +from trustgraph.schema import Term, IRI, LITERAL class TestCassandraQueryProcessor: @@ -21,64 +21,67 @@ class TestCassandraQueryProcessor: graph_host='localhost' ) - def test_create_value_with_http_uri(self, processor): - """Test create_value with HTTP URI""" - result = processor.create_value("http://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + def test_create_term_with_http_uri(self, processor): + """Test create_term with HTTP URI""" + result = create_term("http://example.com/resource") - def test_create_value_with_https_uri(self, processor): - """Test create_value with HTTPS URI""" - result = processor.create_value("https://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "https://example.com/resource" - assert result.is_uri is True + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI - def test_create_value_with_literal(self, processor): - """Test create_value with literal value""" - result = processor.create_value("just a literal string") - - assert isinstance(result, Value) + def test_create_term_with_https_uri(self, processor): + """Test create_term with HTTPS URI""" + result = create_term("https://example.com/resource") + + assert isinstance(result, Term) + assert result.iri == "https://example.com/resource" + assert result.type == IRI + + def test_create_term_with_literal(self, processor): + """Test create_term with literal value""" + result = create_term("just a literal string") + + assert isinstance(result, Term) assert result.value == "just a literal string" - assert result.is_uri is False + assert result.type == LITERAL - def test_create_value_with_empty_string(self, processor): - """Test create_value with empty string""" - result = processor.create_value("") - - assert isinstance(result, Value) + def test_create_term_with_empty_string(self, processor): + """Test create_term with empty string""" + result = create_term("") + + assert isinstance(result, Term) assert result.value == "" - assert result.is_uri is False + assert result.type == LITERAL - def test_create_value_with_partial_uri(self, processor): - """Test create_value with string that looks like URI but isn't complete""" - result = processor.create_value("http") - - assert isinstance(result, Value) + def test_create_term_with_partial_uri(self, processor): + """Test create_term with string that looks like URI but isn't complete""" + result = create_term("http") + + assert isinstance(result, Term) assert result.value == "http" - assert result.is_uri is False + assert result.type == LITERAL - def test_create_value_with_ftp_uri(self, processor): - """Test create_value with FTP URI (should not be detected as URI)""" - result = processor.create_value("ftp://example.com/file") - - assert isinstance(result, Value) + def test_create_term_with_ftp_uri(self, processor): + """Test create_term with FTP URI (should not be detected as URI)""" + result = create_term("ftp://example.com/file") + + assert isinstance(result, Term) assert result.value == "ftp://example.com/file" - assert result.is_uri is False + assert result.type == LITERAL @pytest.mark.asyncio @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_spo_query(self, mock_trustgraph): """Test querying triples with subject, predicate, and object specified""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL # Setup mock TrustGraph mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance - mock_tg_instance.get_spo.return_value = None # SPO query returns None if found + # SPO query returns a list of results (with mock graph attribute) + mock_result = MagicMock() + mock_result.g = None + mock_tg_instance.get_spo.return_value = [mock_result] processor = Processor( taskgroup=MagicMock(), @@ -90,9 +93,9 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), - o=Value(value='test_object', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), + o=Term(type=LITERAL, value='test_object'), limit=100 ) @@ -106,7 +109,7 @@ class TestCassandraQueryProcessor: # Verify get_spo was called with correct parameters mock_tg_instance.get_spo.assert_called_once_with( - 'test_collection', 'test_subject', 'test_predicate', 'test_object', limit=100 + 'test_collection', 'test_subject', 'test_predicate', 'test_object', g=None, limit=100 ) # Verify result contains the queried triple @@ -146,7 +149,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_sp_pattern(self, mock_trustgraph): """Test SP query pattern (subject and predicate, no object)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL # Setup mock TrustGraph and response mock_tg_instance = MagicMock() @@ -161,15 +164,15 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), o=None, limit=50 ) result = await processor.query_triples(query) - mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', limit=50) + mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50) assert len(result) == 1 assert result[0].s.value == 'test_subject' assert result[0].p.value == 'test_predicate' @@ -179,7 +182,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_s_pattern(self, mock_trustgraph): """Test S query pattern (subject only)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -194,7 +197,7 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), p=None, o=None, limit=25 @@ -202,7 +205,7 @@ class TestCassandraQueryProcessor: result = await processor.query_triples(query) - mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', limit=25) + mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25) assert len(result) == 1 assert result[0].s.value == 'test_subject' assert result[0].p.value == 'result_predicate' @@ -212,7 +215,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_p_pattern(self, mock_trustgraph): """Test P query pattern (predicate only)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -228,14 +231,14 @@ class TestCassandraQueryProcessor: user='test_user', collection='test_collection', s=None, - p=Value(value='test_predicate', is_uri=False), + p=Term(type=LITERAL, value='test_predicate'), o=None, limit=10 ) result = await processor.query_triples(query) - mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', limit=10) + mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10) assert len(result) == 1 assert result[0].s.value == 'result_subject' assert result[0].p.value == 'test_predicate' @@ -245,7 +248,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_o_pattern(self, mock_trustgraph): """Test O query pattern (object only)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -262,13 +265,13 @@ class TestCassandraQueryProcessor: collection='test_collection', s=None, p=None, - o=Value(value='test_object', is_uri=False), + o=Term(type=LITERAL, value='test_object'), limit=75 ) result = await processor.query_triples(query) - mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', limit=75) + mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75) assert len(result) == 1 assert result[0].s.value == 'result_subject' assert result[0].p.value == 'result_predicate' @@ -372,18 +375,21 @@ class TestCassandraQueryProcessor: run() - mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o) triple, some values may be\nnull. Output is a list of triples.\n') + mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o, g) quad pattern, some values may be\nnull. Output is a list of quads.\n') @pytest.mark.asyncio @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_with_authentication(self, mock_trustgraph): """Test querying with username and password authentication""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance - mock_tg_instance.get_spo.return_value = None - + # SPO query returns a list of results + mock_result = MagicMock() + mock_result.g = None + mock_tg_instance.get_spo.return_value = [mock_result] + processor = Processor( taskgroup=MagicMock(), cassandra_username='authuser', @@ -393,9 +399,9 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), - o=Value(value='test_object', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), + o=Term(type=LITERAL, value='test_object'), limit=100 ) @@ -413,27 +419,30 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_table_reuse(self, mock_trustgraph): """Test that TrustGraph is reused for same table""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance - mock_tg_instance.get_spo.return_value = None - + # SPO query returns a list of results + mock_result = MagicMock() + mock_result.g = None + mock_tg_instance.get_spo.return_value = [mock_result] + processor = Processor(taskgroup=MagicMock()) - + query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), - o=Value(value='test_object', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), + o=Term(type=LITERAL, value='test_object'), limit=100 ) - + # First query should create TrustGraph await processor.query_triples(query) assert mock_trustgraph.call_count == 1 - + # Second query with same table should reuse TrustGraph await processor.query_triples(query) assert mock_trustgraph.call_count == 1 # Should not increase @@ -442,7 +451,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_table_switching(self, mock_trustgraph): """Test table switching creates new TrustGraph""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance1 = MagicMock() mock_tg_instance2 = MagicMock() @@ -454,7 +463,7 @@ class TestCassandraQueryProcessor: query1 = TriplesQueryRequest( user='user1', collection='collection1', - s=Value(value='test_subject', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), p=None, o=None, limit=100 @@ -467,7 +476,7 @@ class TestCassandraQueryProcessor: query2 = TriplesQueryRequest( user='user2', collection='collection2', - s=Value(value='test_subject', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), p=None, o=None, limit=100 @@ -483,7 +492,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_exception_handling(self, mock_trustgraph): """Test exception handling during query execution""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -494,9 +503,9 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), - o=Value(value='test_object', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), + o=Term(type=LITERAL, value='test_object'), limit=100 ) @@ -507,7 +516,7 @@ class TestCassandraQueryProcessor: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_query_triples_multiple_results(self, mock_trustgraph): """Test query returning multiple results""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -524,8 +533,8 @@ class TestCassandraQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), - p=Value(value='test_predicate', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), + p=Term(type=LITERAL, value='test_predicate'), o=None, limit=100 ) @@ -544,7 +553,7 @@ class TestCassandraQueryPerformanceOptimizations: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_get_po_query_optimization(self, mock_trustgraph): """Test that get_po queries use optimized table (no ALLOW FILTERING)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -560,8 +569,8 @@ class TestCassandraQueryPerformanceOptimizations: user='test_user', collection='test_collection', s=None, - p=Value(value='test_predicate', is_uri=False), - o=Value(value='test_object', is_uri=False), + p=Term(type=LITERAL, value='test_predicate'), + o=Term(type=LITERAL, value='test_object'), limit=50 ) @@ -569,7 +578,7 @@ class TestCassandraQueryPerformanceOptimizations: # Verify get_po was called (should use optimized po_table) mock_tg_instance.get_po.assert_called_once_with( - 'test_collection', 'test_predicate', 'test_object', limit=50 + 'test_collection', 'test_predicate', 'test_object', g=None, limit=50 ) assert len(result) == 1 @@ -581,7 +590,7 @@ class TestCassandraQueryPerformanceOptimizations: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_get_os_query_optimization(self, mock_trustgraph): """Test that get_os queries use optimized table (no ALLOW FILTERING)""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -596,9 +605,9 @@ class TestCassandraQueryPerformanceOptimizations: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value='test_subject', is_uri=False), + s=Term(type=LITERAL, value='test_subject'), p=None, - o=Value(value='test_object', is_uri=False), + o=Term(type=LITERAL, value='test_object'), limit=25 ) @@ -606,7 +615,7 @@ class TestCassandraQueryPerformanceOptimizations: # Verify get_os was called (should use optimized subject_table with clustering) mock_tg_instance.get_os.assert_called_once_with( - 'test_collection', 'test_object', 'test_subject', limit=25 + 'test_collection', 'test_object', 'test_subject', g=None, limit=25 ) assert len(result) == 1 @@ -618,7 +627,7 @@ class TestCassandraQueryPerformanceOptimizations: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_all_query_patterns_use_correct_tables(self, mock_trustgraph): """Test that all query patterns route to their optimal tables""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -655,9 +664,9 @@ class TestCassandraQueryPerformanceOptimizations: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value=s, is_uri=False) if s else None, - p=Value(value=p, is_uri=False) if p else None, - o=Value(value=o, is_uri=False) if o else None, + s=Term(type=LITERAL, value=s) if s else None, + p=Term(type=LITERAL, value=p) if p else None, + o=Term(type=LITERAL, value=o) if o else None, limit=10 ) @@ -690,7 +699,7 @@ class TestCassandraQueryPerformanceOptimizations: @patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph') async def test_performance_critical_po_query_no_filtering(self, mock_trustgraph): """Test the performance-critical PO query that eliminates ALLOW FILTERING""" - from trustgraph.schema import TriplesQueryRequest, Value + from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL mock_tg_instance = MagicMock() mock_trustgraph.return_value = mock_tg_instance @@ -711,8 +720,8 @@ class TestCassandraQueryPerformanceOptimizations: user='large_dataset_user', collection='massive_collection', s=None, - p=Value(value='http://www.w3.org/1999/02/22-rdf-syntax-ns#type', is_uri=True), - o=Value(value='http://example.com/Person', is_uri=True), + p=Term(type=IRI, iri='http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + o=Term(type=IRI, iri='http://example.com/Person'), limit=1000 ) @@ -723,14 +732,15 @@ class TestCassandraQueryPerformanceOptimizations: 'massive_collection', 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'http://example.com/Person', + g=None, limit=1000 ) # Verify all results were returned assert len(result) == 5 for i, triple in enumerate(result): - assert triple.s.value == f'subject_{i}' - assert triple.p.value == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' - assert triple.p.is_uri is True - assert triple.o.value == 'http://example.com/Person' - assert triple.o.is_uri is True \ No newline at end of file + assert triple.s.value == f'subject_{i}' # Mock returns literal values + assert triple.p.iri == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' + assert triple.p.type == IRI + assert triple.o.iri == 'http://example.com/Person' # URIs use .iri + assert triple.o.type == IRI \ No newline at end of file diff --git a/tests/unit/test_query/test_triples_falkordb_query.py b/tests/unit/test_query/test_triples_falkordb_query.py index 3e7d07db..d5c047d7 100644 --- a/tests/unit/test_query/test_triples_falkordb_query.py +++ b/tests/unit/test_query/test_triples_falkordb_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.triples.falkordb.service import Processor -from trustgraph.schema import Value, TriplesQueryRequest +from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL class TestFalkorDBQueryProcessor: @@ -25,50 +25,50 @@ class TestFalkorDBQueryProcessor: def test_create_value_with_http_uri(self, processor): """Test create_value with HTTP URI""" result = processor.create_value("http://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI def test_create_value_with_https_uri(self, processor): """Test create_value with HTTPS URI""" result = processor.create_value("https://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "https://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "https://example.com/resource" + assert result.type == IRI def test_create_value_with_literal(self, processor): """Test create_value with literal value""" result = processor.create_value("just a literal string") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "just a literal string" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_empty_string(self, processor): """Test create_value with empty string""" result = processor.create_value("") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_partial_uri(self, processor): """Test create_value with string that looks like URI but isn't complete""" result = processor.create_value("http") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "http" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_ftp_uri(self, processor): """Test create_value with FTP URI (should not be detected as URI)""" result = processor.create_value("ftp://example.com/file") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "ftp://example.com/file" - assert result.is_uri is False + assert result.type == LITERAL @patch('trustgraph.query.triples.falkordb.service.FalkorDB') def test_processor_initialization_with_defaults(self, mock_falkordb): @@ -125,9 +125,9 @@ class TestFalkorDBQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal object", is_uri=False), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -138,8 +138,8 @@ class TestFalkorDBQueryProcessor: # Verify result contains the queried triple (appears twice - once from each query) assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal object" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @@ -166,8 +166,8 @@ class TestFalkorDBQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None, limit=100 ) @@ -179,13 +179,13 @@ class TestFalkorDBQueryProcessor: # Verify results contain different objects assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal result" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/predicate" - assert result[1].o.value == "http://example.com/uri_result" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/predicate" + assert result[1].o.iri == "http://example.com/uri_result" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @pytest.mark.asyncio @@ -211,9 +211,9 @@ class TestFalkorDBQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, - o=Value(value="literal object", is_uri=False), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -224,12 +224,12 @@ class TestFalkorDBQueryProcessor: # Verify results contain different predicates assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/pred2" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/pred2" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @@ -256,7 +256,7 @@ class TestFalkorDBQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, o=None, limit=100 @@ -269,13 +269,13 @@ class TestFalkorDBQueryProcessor: # Verify results contain different predicate-object pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/pred2" - assert result[1].o.value == "http://example.com/uri2" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/pred2" + assert result[1].o.iri == "http://example.com/uri2" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @pytest.mark.asyncio @@ -302,8 +302,8 @@ class TestFalkorDBQueryProcessor: user='test_user', collection='test_collection', s=None, - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal object", is_uri=False), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -314,12 +314,12 @@ class TestFalkorDBQueryProcessor: # Verify results contain different subjects assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/predicate" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/predicate" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @@ -347,7 +347,7 @@ class TestFalkorDBQueryProcessor: user='test_user', collection='test_collection', s=None, - p=Value(value="http://example.com/predicate", is_uri=True), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None, limit=100 ) @@ -359,13 +359,13 @@ class TestFalkorDBQueryProcessor: # Verify results contain different subject-object pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/predicate" - assert result[1].o.value == "http://example.com/uri2" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/predicate" + assert result[1].o.iri == "http://example.com/uri2" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @pytest.mark.asyncio @@ -393,7 +393,7 @@ class TestFalkorDBQueryProcessor: collection='test_collection', s=None, p=None, - o=Value(value="literal object", is_uri=False), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -404,12 +404,12 @@ class TestFalkorDBQueryProcessor: # Verify results contain different subject-predicate pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/pred2" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/pred2" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @@ -449,13 +449,13 @@ class TestFalkorDBQueryProcessor: # Verify results contain different triples assert len(result) == 2 - assert result[0].s.value == "http://example.com/s1" - assert result[0].p.value == "http://example.com/p1" + assert result[0].s.iri == "http://example.com/s1" + assert result[0].p.iri == "http://example.com/p1" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/s2" - assert result[1].p.value == "http://example.com/p2" - assert result[1].o.value == "http://example.com/o2" + assert result[1].s.iri == "http://example.com/s2" + assert result[1].p.iri == "http://example.com/p2" + assert result[1].o.iri == "http://example.com/o2" @patch('trustgraph.query.triples.falkordb.service.FalkorDB') @pytest.mark.asyncio @@ -476,7 +476,7 @@ class TestFalkorDBQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, o=None, limit=100 diff --git a/tests/unit/test_query/test_triples_memgraph_query.py b/tests/unit/test_query/test_triples_memgraph_query.py index bd394ae4..f4222af1 100644 --- a/tests/unit/test_query/test_triples_memgraph_query.py +++ b/tests/unit/test_query/test_triples_memgraph_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.triples.memgraph.service import Processor -from trustgraph.schema import Value, TriplesQueryRequest +from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL class TestMemgraphQueryProcessor: @@ -25,50 +25,50 @@ class TestMemgraphQueryProcessor: def test_create_value_with_http_uri(self, processor): """Test create_value with HTTP URI""" result = processor.create_value("http://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI def test_create_value_with_https_uri(self, processor): """Test create_value with HTTPS URI""" result = processor.create_value("https://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "https://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "https://example.com/resource" + assert result.type == IRI def test_create_value_with_literal(self, processor): """Test create_value with literal value""" result = processor.create_value("just a literal string") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "just a literal string" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_empty_string(self, processor): """Test create_value with empty string""" result = processor.create_value("") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_partial_uri(self, processor): """Test create_value with string that looks like URI but isn't complete""" result = processor.create_value("http") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "http" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_ftp_uri(self, processor): """Test create_value with FTP URI (should not be detected as URI)""" result = processor.create_value("ftp://example.com/file") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "ftp://example.com/file" - assert result.is_uri is False + assert result.type == LITERAL @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') def test_processor_initialization_with_defaults(self, mock_graph_db): @@ -124,9 +124,9 @@ class TestMemgraphQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal object", is_uri=False), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -137,8 +137,8 @@ class TestMemgraphQueryProcessor: # Verify result contains the queried triple (appears twice - once from each query) assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal object" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @@ -166,8 +166,8 @@ class TestMemgraphQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None, limit=100 ) @@ -179,13 +179,13 @@ class TestMemgraphQueryProcessor: # Verify results contain different objects assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal result" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/predicate" - assert result[1].o.value == "http://example.com/uri_result" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/predicate" + assert result[1].o.iri == "http://example.com/uri_result" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @pytest.mark.asyncio @@ -212,9 +212,9 @@ class TestMemgraphQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, - o=Value(value="literal object", is_uri=False), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -225,12 +225,12 @@ class TestMemgraphQueryProcessor: # Verify results contain different predicates assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/pred2" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/pred2" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @@ -258,7 +258,7 @@ class TestMemgraphQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, o=None, limit=100 @@ -271,13 +271,13 @@ class TestMemgraphQueryProcessor: # Verify results contain different predicate-object pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/pred2" - assert result[1].o.value == "http://example.com/uri2" + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/pred2" + assert result[1].o.iri == "http://example.com/uri2" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @pytest.mark.asyncio @@ -305,8 +305,8 @@ class TestMemgraphQueryProcessor: user='test_user', collection='test_collection', s=None, - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal object", is_uri=False), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -317,12 +317,12 @@ class TestMemgraphQueryProcessor: # Verify results contain different subjects assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/predicate" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/predicate" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @@ -351,7 +351,7 @@ class TestMemgraphQueryProcessor: user='test_user', collection='test_collection', s=None, - p=Value(value="http://example.com/predicate", is_uri=True), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None, limit=100 ) @@ -363,13 +363,13 @@ class TestMemgraphQueryProcessor: # Verify results contain different subject-object pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/predicate" - assert result[1].o.value == "http://example.com/uri2" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/predicate" + assert result[1].o.iri == "http://example.com/uri2" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @pytest.mark.asyncio @@ -398,7 +398,7 @@ class TestMemgraphQueryProcessor: collection='test_collection', s=None, p=None, - o=Value(value="literal object", is_uri=False), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -409,12 +409,12 @@ class TestMemgraphQueryProcessor: # Verify results contain different subject-predicate pairs assert len(result) == 2 - assert result[0].s.value == "http://example.com/subj1" - assert result[0].p.value == "http://example.com/pred1" + assert result[0].s.iri == "http://example.com/subj1" + assert result[0].p.iri == "http://example.com/pred1" assert result[0].o.value == "literal object" - assert result[1].s.value == "http://example.com/subj2" - assert result[1].p.value == "http://example.com/pred2" + assert result[1].s.iri == "http://example.com/subj2" + assert result[1].p.iri == "http://example.com/pred2" assert result[1].o.value == "literal object" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @@ -455,13 +455,13 @@ class TestMemgraphQueryProcessor: # Verify results contain different triples assert len(result) == 2 - assert result[0].s.value == "http://example.com/s1" - assert result[0].p.value == "http://example.com/p1" + assert result[0].s.iri == "http://example.com/s1" + assert result[0].p.iri == "http://example.com/p1" assert result[0].o.value == "literal1" - assert result[1].s.value == "http://example.com/s2" - assert result[1].p.value == "http://example.com/p2" - assert result[1].o.value == "http://example.com/o2" + assert result[1].s.iri == "http://example.com/s2" + assert result[1].p.iri == "http://example.com/p2" + assert result[1].o.iri == "http://example.com/o2" @patch('trustgraph.query.triples.memgraph.service.GraphDatabase') @pytest.mark.asyncio @@ -480,7 +480,7 @@ class TestMemgraphQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, o=None, limit=100 diff --git a/tests/unit/test_query/test_triples_neo4j_query.py b/tests/unit/test_query/test_triples_neo4j_query.py index 320aed54..e379ed21 100644 --- a/tests/unit/test_query/test_triples_neo4j_query.py +++ b/tests/unit/test_query/test_triples_neo4j_query.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.query.triples.neo4j.service import Processor -from trustgraph.schema import Value, TriplesQueryRequest +from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL class TestNeo4jQueryProcessor: @@ -25,50 +25,50 @@ class TestNeo4jQueryProcessor: def test_create_value_with_http_uri(self, processor): """Test create_value with HTTP URI""" result = processor.create_value("http://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "http://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "http://example.com/resource" + assert result.type == IRI def test_create_value_with_https_uri(self, processor): """Test create_value with HTTPS URI""" result = processor.create_value("https://example.com/resource") - - assert isinstance(result, Value) - assert result.value == "https://example.com/resource" - assert result.is_uri is True + + assert isinstance(result, Term) + assert result.iri == "https://example.com/resource" + assert result.type == IRI def test_create_value_with_literal(self, processor): """Test create_value with literal value""" result = processor.create_value("just a literal string") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "just a literal string" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_empty_string(self, processor): """Test create_value with empty string""" result = processor.create_value("") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_partial_uri(self, processor): """Test create_value with string that looks like URI but isn't complete""" result = processor.create_value("http") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "http" - assert result.is_uri is False + assert result.type == LITERAL def test_create_value_with_ftp_uri(self, processor): """Test create_value with FTP URI (should not be detected as URI)""" result = processor.create_value("ftp://example.com/file") - - assert isinstance(result, Value) + + assert isinstance(result, Term) assert result.value == "ftp://example.com/file" - assert result.is_uri is False + assert result.type == LITERAL @patch('trustgraph.query.triples.neo4j.service.GraphDatabase') def test_processor_initialization_with_defaults(self, mock_graph_db): @@ -124,9 +124,9 @@ class TestNeo4jQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal object", is_uri=False), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal object"), limit=100 ) @@ -137,8 +137,8 @@ class TestNeo4jQueryProcessor: # Verify result contains the queried triple (appears twice - once from each query) assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal object" @patch('trustgraph.query.triples.neo4j.service.GraphDatabase') @@ -166,8 +166,8 @@ class TestNeo4jQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None, limit=100 ) @@ -179,13 +179,13 @@ class TestNeo4jQueryProcessor: # Verify results contain different objects assert len(result) == 2 - assert result[0].s.value == "http://example.com/subject" - assert result[0].p.value == "http://example.com/predicate" + assert result[0].s.iri == "http://example.com/subject" + assert result[0].p.iri == "http://example.com/predicate" assert result[0].o.value == "literal result" - - assert result[1].s.value == "http://example.com/subject" - assert result[1].p.value == "http://example.com/predicate" - assert result[1].o.value == "http://example.com/uri_result" + + assert result[1].s.iri == "http://example.com/subject" + assert result[1].p.iri == "http://example.com/predicate" + assert result[1].o.iri == "http://example.com/uri_result" @patch('trustgraph.query.triples.neo4j.service.GraphDatabase') @pytest.mark.asyncio @@ -225,13 +225,13 @@ class TestNeo4jQueryProcessor: # Verify results contain different triples assert len(result) == 2 - assert result[0].s.value == "http://example.com/s1" - assert result[0].p.value == "http://example.com/p1" + assert result[0].s.iri == "http://example.com/s1" + assert result[0].p.iri == "http://example.com/p1" assert result[0].o.value == "literal1" - - assert result[1].s.value == "http://example.com/s2" - assert result[1].p.value == "http://example.com/p2" - assert result[1].o.value == "http://example.com/o2" + + assert result[1].s.iri == "http://example.com/s2" + assert result[1].p.iri == "http://example.com/p2" + assert result[1].o.iri == "http://example.com/o2" @patch('trustgraph.query.triples.neo4j.service.GraphDatabase') @pytest.mark.asyncio @@ -250,12 +250,12 @@ class TestNeo4jQueryProcessor: query = TriplesQueryRequest( user='test_user', collection='test_collection', - s=Value(value="http://example.com/subject", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), p=None, o=None, limit=100 ) - + # Should raise the exception with pytest.raises(Exception, match="Database connection failed"): await processor.query_triples(query) diff --git a/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py b/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py index a22173ab..8a8e1090 100644 --- a/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py +++ b/tests/unit/test_storage/test_graph_embeddings_milvus_storage.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.storage.graph_embeddings.milvus.write import Processor -from trustgraph.schema import Value, EntityEmbeddings +from trustgraph.schema import Term, EntityEmbeddings, IRI, LITERAL class TestMilvusGraphEmbeddingsStorageProcessor: @@ -22,11 +22,11 @@ class TestMilvusGraphEmbeddingsStorageProcessor: # Create test entities with embeddings entity1 = EntityEmbeddings( - entity=Value(value='http://example.com/entity1', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/entity1'), vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] ) entity2 = EntityEmbeddings( - entity=Value(value='literal entity', is_uri=False), + entity=Term(type=LITERAL, value='literal entity'), vectors=[[0.7, 0.8, 0.9]] ) message.entities = [entity1, entity2] @@ -84,7 +84,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' entity = EntityEmbeddings( - entity=Value(value='http://example.com/entity', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/entity'), vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] ) message.entities = [entity] @@ -136,7 +136,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' entity = EntityEmbeddings( - entity=Value(value='', is_uri=False), + entity=Term(type=LITERAL, value=''), vectors=[[0.1, 0.2, 0.3]] ) message.entities = [entity] @@ -155,7 +155,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' entity = EntityEmbeddings( - entity=Value(value=None, is_uri=False), + entity=Term(type=LITERAL, value=None), vectors=[[0.1, 0.2, 0.3]] ) message.entities = [entity] @@ -174,15 +174,15 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' valid_entity = EntityEmbeddings( - entity=Value(value='http://example.com/valid', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/valid'), vectors=[[0.1, 0.2, 0.3]] ) empty_entity = EntityEmbeddings( - entity=Value(value='', is_uri=False), + entity=Term(type=LITERAL, value=''), vectors=[[0.4, 0.5, 0.6]] ) none_entity = EntityEmbeddings( - entity=Value(value=None, is_uri=False), + entity=Term(type=LITERAL, value=None), vectors=[[0.7, 0.8, 0.9]] ) message.entities = [valid_entity, empty_entity, none_entity] @@ -217,7 +217,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' entity = EntityEmbeddings( - entity=Value(value='http://example.com/entity', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/entity'), vectors=[] ) message.entities = [entity] @@ -236,7 +236,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' entity = EntityEmbeddings( - entity=Value(value='http://example.com/entity', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/entity'), vectors=[ [0.1, 0.2], # 2D vector [0.3, 0.4, 0.5, 0.6], # 4D vector @@ -269,11 +269,11 @@ class TestMilvusGraphEmbeddingsStorageProcessor: message.metadata.collection = 'test_collection' uri_entity = EntityEmbeddings( - entity=Value(value='http://example.com/uri_entity', is_uri=True), + entity=Term(type=IRI, iri='http://example.com/uri_entity'), vectors=[[0.1, 0.2, 0.3]] ) literal_entity = EntityEmbeddings( - entity=Value(value='literal entity text', is_uri=False), + entity=Term(type=LITERAL, value='literal entity text'), vectors=[[0.4, 0.5, 0.6]] ) message.entities = [uri_entity, literal_entity] diff --git a/tests/unit/test_storage/test_graph_embeddings_qdrant_storage.py b/tests/unit/test_storage/test_graph_embeddings_qdrant_storage.py index d240b892..8b1a710a 100644 --- a/tests/unit/test_storage/test_graph_embeddings_qdrant_storage.py +++ b/tests/unit/test_storage/test_graph_embeddings_qdrant_storage.py @@ -9,6 +9,7 @@ from unittest import IsolatedAsyncioTestCase # Import the service under test from trustgraph.storage.graph_embeddings.qdrant.write import Processor +from trustgraph.schema import IRI, LITERAL class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase): @@ -67,7 +68,8 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase): mock_message.metadata.collection = 'test_collection' mock_entity = MagicMock() - mock_entity.entity.value = 'test_entity' + mock_entity.entity.type = IRI + mock_entity.entity.iri = 'test_entity' mock_entity.vectors = [[0.1, 0.2, 0.3]] # Single vector with 3 dimensions mock_message.entities = [mock_entity] @@ -120,11 +122,13 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase): mock_message.metadata.collection = 'multi_collection' mock_entity1 = MagicMock() - mock_entity1.entity.value = 'entity_one' + mock_entity1.entity.type = IRI + mock_entity1.entity.iri = 'entity_one' mock_entity1.vectors = [[0.1, 0.2]] - + mock_entity2 = MagicMock() - mock_entity2.entity.value = 'entity_two' + mock_entity2.entity.type = IRI + mock_entity2.entity.iri = 'entity_two' mock_entity2.vectors = [[0.3, 0.4]] mock_message.entities = [mock_entity1, mock_entity2] @@ -179,7 +183,8 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase): mock_message.metadata.collection = 'vector_collection' mock_entity = MagicMock() - mock_entity.entity.value = 'multi_vector_entity' + mock_entity.entity.type = IRI + mock_entity.entity.iri = 'multi_vector_entity' mock_entity.vectors = [ [0.1, 0.2, 0.3], [0.4, 0.5, 0.6], @@ -231,11 +236,12 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase): mock_message.metadata.collection = 'empty_collection' mock_entity_empty = MagicMock() + mock_entity_empty.entity.type = LITERAL mock_entity_empty.entity.value = "" # Empty string mock_entity_empty.vectors = [[0.1, 0.2]] - + mock_entity_none = MagicMock() - mock_entity_none.entity.value = None # None value + mock_entity_none.entity = None # None entity mock_entity_none.vectors = [[0.3, 0.4]] mock_message.entities = [mock_entity_empty, mock_entity_none] diff --git a/tests/unit/test_storage/test_neo4j_user_collection_isolation.py b/tests/unit/test_storage/test_neo4j_user_collection_isolation.py index bc8bb03f..dce170a7 100644 --- a/tests/unit/test_storage/test_neo4j_user_collection_isolation.py +++ b/tests/unit/test_storage/test_neo4j_user_collection_isolation.py @@ -7,7 +7,7 @@ from unittest.mock import MagicMock, patch, call from trustgraph.storage.triples.neo4j.write import Processor as StorageProcessor from trustgraph.query.triples.neo4j.service import Processor as QueryProcessor -from trustgraph.schema import Triples, Triple, Value, Metadata +from trustgraph.schema import Triples, Triple, Term, Metadata, IRI, LITERAL from trustgraph.schema import TriplesQueryRequest @@ -60,9 +60,9 @@ class TestNeo4jUserCollectionIsolation: ) triple = Triple( - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="literal_value", is_uri=False) + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="literal_value") ) message = Triples( @@ -128,9 +128,9 @@ class TestNeo4jUserCollectionIsolation: metadata = Metadata(id="test-id") triple = Triple( - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="http://example.com/object", is_uri=True) + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=IRI, iri="http://example.com/object") ) message = Triples( @@ -170,8 +170,8 @@ class TestNeo4jUserCollectionIsolation: query = TriplesQueryRequest( user="test_user", collection="test_collection", - s=Value(value="http://example.com/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), + s=Term(type=IRI, iri="http://example.com/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), o=None ) @@ -254,9 +254,9 @@ class TestNeo4jUserCollectionIsolation: metadata=Metadata(user="user1", collection="coll1"), triples=[ Triple( - s=Value(value="http://example.com/user1/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="user1_data", is_uri=False) + s=Term(type=IRI, iri="http://example.com/user1/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="user1_data") ) ] ) @@ -265,9 +265,9 @@ class TestNeo4jUserCollectionIsolation: metadata=Metadata(user="user2", collection="coll2"), triples=[ Triple( - s=Value(value="http://example.com/user2/subject", is_uri=True), - p=Value(value="http://example.com/predicate", is_uri=True), - o=Value(value="user2_data", is_uri=False) + s=Term(type=IRI, iri="http://example.com/user2/subject"), + p=Term(type=IRI, iri="http://example.com/predicate"), + o=Term(type=LITERAL, value="user2_data") ) ] ) @@ -429,9 +429,9 @@ class TestNeo4jUserCollectionRegression: metadata=Metadata(user="user1", collection="coll1"), triples=[ Triple( - s=Value(value=shared_uri, is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="user1_value", is_uri=False) + s=Term(type=IRI, iri=shared_uri), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="user1_value") ) ] ) @@ -440,9 +440,9 @@ class TestNeo4jUserCollectionRegression: metadata=Metadata(user="user2", collection="coll2"), triples=[ Triple( - s=Value(value=shared_uri, is_uri=True), - p=Value(value="http://example.com/p", is_uri=True), - o=Value(value="user2_value", is_uri=False) + s=Term(type=IRI, iri=shared_uri), + p=Term(type=IRI, iri="http://example.com/p"), + o=Term(type=LITERAL, value="user2_value") ) ] ) diff --git a/tests/unit/test_storage/test_triples_cassandra_storage.py b/tests/unit/test_storage/test_triples_cassandra_storage.py index 54ea1a95..3fdff6b9 100644 --- a/tests/unit/test_storage/test_triples_cassandra_storage.py +++ b/tests/unit/test_storage/test_triples_cassandra_storage.py @@ -6,7 +6,8 @@ import pytest from unittest.mock import MagicMock, patch, AsyncMock from trustgraph.storage.triples.cassandra.write import Processor -from trustgraph.schema import Value, Triple +from trustgraph.schema import Triple, LITERAL +from trustgraph.direct.cassandra_kg import DEFAULT_GRAPH class TestCassandraStorageProcessor: @@ -175,29 +176,37 @@ class TestCassandraStorageProcessor: processor = Processor(taskgroup=taskgroup_mock) - # Create mock triples + # Create mock triples with proper Term structure triple1 = MagicMock() + triple1.s.type = LITERAL triple1.s.value = 'subject1' + triple1.p.type = LITERAL triple1.p.value = 'predicate1' + triple1.o.type = LITERAL triple1.o.value = 'object1' - + triple1.g = None + triple2 = MagicMock() + triple2.s.type = LITERAL triple2.s.value = 'subject2' + triple2.p.type = LITERAL triple2.p.value = 'predicate2' + triple2.o.type = LITERAL triple2.o.value = 'object2' - + triple2.g = None + # Create mock message mock_message = MagicMock() mock_message.metadata.user = 'user1' mock_message.metadata.collection = 'collection1' mock_message.triples = [triple1, triple2] - + await processor.store_triples(mock_message) - - # Verify both triples were inserted + + # Verify both triples were inserted (with g= parameter) assert mock_tg_instance.insert.call_count == 2 - mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1') - mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2') + mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1', g=DEFAULT_GRAPH) + mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2', g=DEFAULT_GRAPH) @pytest.mark.asyncio @patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph') @@ -369,25 +378,30 @@ class TestCassandraStorageProcessor: processor = Processor(taskgroup=taskgroup_mock) - # Create triple with special characters + # Create triple with special characters and proper Term structure triple = MagicMock() + triple.s.type = LITERAL triple.s.value = 'subject with spaces & symbols' + triple.p.type = LITERAL triple.p.value = 'predicate:with/colons' + triple.o.type = LITERAL triple.o.value = 'object with "quotes" and unicode: ñáéíóú' - + triple.g = None + mock_message = MagicMock() mock_message.metadata.user = 'test_user' mock_message.metadata.collection = 'test_collection' mock_message.triples = [triple] - + await processor.store_triples(mock_message) - + # Verify the triple was inserted with special characters preserved mock_tg_instance.insert.assert_called_once_with( 'test_collection', 'subject with spaces & symbols', 'predicate:with/colons', - 'object with "quotes" and unicode: ñáéíóú' + 'object with "quotes" and unicode: ñáéíóú', + g=DEFAULT_GRAPH ) @pytest.mark.asyncio @@ -475,11 +489,15 @@ class TestCassandraPerformanceOptimizations: processor = Processor(taskgroup=taskgroup_mock) - # Create test triple + # Create test triple with proper Term structure triple = MagicMock() + triple.s.type = LITERAL triple.s.value = 'test_subject' + triple.p.type = LITERAL triple.p.value = 'test_predicate' + triple.o.type = LITERAL triple.o.value = 'test_object' + triple.g = None mock_message = MagicMock() mock_message.metadata.user = 'user1' @@ -490,7 +508,8 @@ class TestCassandraPerformanceOptimizations: # Verify insert was called for the triple (implementation details tested in KnowledgeGraph) mock_tg_instance.insert.assert_called_once_with( - 'collection1', 'test_subject', 'test_predicate', 'test_object' + 'collection1', 'test_subject', 'test_predicate', 'test_object', + g=DEFAULT_GRAPH ) def test_environment_variable_controls_mode(self): diff --git a/tests/unit/test_storage/test_triples_falkordb_storage.py b/tests/unit/test_storage/test_triples_falkordb_storage.py index 02d9cdd0..05dcb2e5 100644 --- a/tests/unit/test_storage/test_triples_falkordb_storage.py +++ b/tests/unit/test_storage/test_triples_falkordb_storage.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.storage.triples.falkordb.write import Processor -from trustgraph.schema import Value, Triple +from trustgraph.schema import Term, Triple, IRI, LITERAL class TestFalkorDBStorageProcessor: @@ -22,9 +22,9 @@ class TestFalkorDBStorageProcessor: # Create a test triple triple = Triple( - s=Value(value='http://example.com/subject', is_uri=True), - p=Value(value='http://example.com/predicate', is_uri=True), - o=Value(value='literal object', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject'), + p=Term(type=IRI, iri='http://example.com/predicate'), + o=Term(type=LITERAL, value='literal object') ) message.triples = [triple] @@ -183,9 +183,9 @@ class TestFalkorDBStorageProcessor: message.metadata.collection = 'test_collection' triple = Triple( - s=Value(value='http://example.com/subject', is_uri=True), - p=Value(value='http://example.com/predicate', is_uri=True), - o=Value(value='http://example.com/object', is_uri=True) + s=Term(type=IRI, iri='http://example.com/subject'), + p=Term(type=IRI, iri='http://example.com/predicate'), + o=Term(type=IRI, iri='http://example.com/object') ) message.triples = [triple] @@ -269,14 +269,14 @@ class TestFalkorDBStorageProcessor: message.metadata.collection = 'test_collection' triple1 = Triple( - s=Value(value='http://example.com/subject1', is_uri=True), - p=Value(value='http://example.com/predicate1', is_uri=True), - o=Value(value='literal object1', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject1'), + p=Term(type=IRI, iri='http://example.com/predicate1'), + o=Term(type=LITERAL, value='literal object1') ) triple2 = Triple( - s=Value(value='http://example.com/subject2', is_uri=True), - p=Value(value='http://example.com/predicate2', is_uri=True), - o=Value(value='http://example.com/object2', is_uri=True) + s=Term(type=IRI, iri='http://example.com/subject2'), + p=Term(type=IRI, iri='http://example.com/predicate2'), + o=Term(type=IRI, iri='http://example.com/object2') ) message.triples = [triple1, triple2] @@ -337,14 +337,14 @@ class TestFalkorDBStorageProcessor: message.metadata.collection = 'test_collection' triple1 = Triple( - s=Value(value='http://example.com/subject1', is_uri=True), - p=Value(value='http://example.com/predicate1', is_uri=True), - o=Value(value='literal object', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject1'), + p=Term(type=IRI, iri='http://example.com/predicate1'), + o=Term(type=LITERAL, value='literal object') ) triple2 = Triple( - s=Value(value='http://example.com/subject2', is_uri=True), - p=Value(value='http://example.com/predicate2', is_uri=True), - o=Value(value='http://example.com/object2', is_uri=True) + s=Term(type=IRI, iri='http://example.com/subject2'), + p=Term(type=IRI, iri='http://example.com/predicate2'), + o=Term(type=IRI, iri='http://example.com/object2') ) message.triples = [triple1, triple2] diff --git a/tests/unit/test_storage/test_triples_memgraph_storage.py b/tests/unit/test_storage/test_triples_memgraph_storage.py index b38f0759..162586d5 100644 --- a/tests/unit/test_storage/test_triples_memgraph_storage.py +++ b/tests/unit/test_storage/test_triples_memgraph_storage.py @@ -6,7 +6,7 @@ import pytest from unittest.mock import MagicMock, patch from trustgraph.storage.triples.memgraph.write import Processor -from trustgraph.schema import Value, Triple +from trustgraph.schema import Term, Triple, IRI, LITERAL class TestMemgraphStorageProcessor: @@ -22,9 +22,9 @@ class TestMemgraphStorageProcessor: # Create a test triple triple = Triple( - s=Value(value='http://example.com/subject', is_uri=True), - p=Value(value='http://example.com/predicate', is_uri=True), - o=Value(value='literal object', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject'), + p=Term(type=IRI, iri='http://example.com/predicate'), + o=Term(type=LITERAL, value='literal object') ) message.triples = [triple] @@ -231,9 +231,9 @@ class TestMemgraphStorageProcessor: mock_tx = MagicMock() triple = Triple( - s=Value(value='http://example.com/subject', is_uri=True), - p=Value(value='http://example.com/predicate', is_uri=True), - o=Value(value='http://example.com/object', is_uri=True) + s=Term(type=IRI, iri='http://example.com/subject'), + p=Term(type=IRI, iri='http://example.com/predicate'), + o=Term(type=IRI, iri='http://example.com/object') ) processor.create_triple(mock_tx, triple, "test_user", "test_collection") @@ -265,9 +265,9 @@ class TestMemgraphStorageProcessor: mock_tx = MagicMock() triple = Triple( - s=Value(value='http://example.com/subject', is_uri=True), - p=Value(value='http://example.com/predicate', is_uri=True), - o=Value(value='literal object', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject'), + p=Term(type=IRI, iri='http://example.com/predicate'), + o=Term(type=LITERAL, value='literal object') ) processor.create_triple(mock_tx, triple, "test_user", "test_collection") @@ -347,14 +347,14 @@ class TestMemgraphStorageProcessor: message.metadata.collection = 'test_collection' triple1 = Triple( - s=Value(value='http://example.com/subject1', is_uri=True), - p=Value(value='http://example.com/predicate1', is_uri=True), - o=Value(value='literal object1', is_uri=False) + s=Term(type=IRI, iri='http://example.com/subject1'), + p=Term(type=IRI, iri='http://example.com/predicate1'), + o=Term(type=LITERAL, value='literal object1') ) triple2 = Triple( - s=Value(value='http://example.com/subject2', is_uri=True), - p=Value(value='http://example.com/predicate2', is_uri=True), - o=Value(value='http://example.com/object2', is_uri=True) + s=Term(type=IRI, iri='http://example.com/subject2'), + p=Term(type=IRI, iri='http://example.com/predicate2'), + o=Term(type=IRI, iri='http://example.com/object2') ) message.triples = [triple1, triple2] diff --git a/tests/unit/test_storage/test_triples_neo4j_storage.py b/tests/unit/test_storage/test_triples_neo4j_storage.py index 2e307102..a5181ed9 100644 --- a/tests/unit/test_storage/test_triples_neo4j_storage.py +++ b/tests/unit/test_storage/test_triples_neo4j_storage.py @@ -6,6 +6,7 @@ import pytest from unittest.mock import MagicMock, patch, AsyncMock from trustgraph.storage.triples.neo4j.write import Processor +from trustgraph.schema import IRI, LITERAL class TestNeo4jStorageProcessor: @@ -257,10 +258,12 @@ class TestNeo4jStorageProcessor: # Create mock triple with URI object triple = MagicMock() - triple.s.value = "http://example.com/subject" - triple.p.value = "http://example.com/predicate" - triple.o.value = "http://example.com/object" - triple.o.is_uri = True + triple.s.type = IRI + triple.s.iri = "http://example.com/subject" + triple.p.type = IRI + triple.p.iri = "http://example.com/predicate" + triple.o.type = IRI + triple.o.iri = "http://example.com/object" # Create mock message with metadata mock_message = MagicMock() @@ -327,10 +330,12 @@ class TestNeo4jStorageProcessor: # Create mock triple with literal object triple = MagicMock() - triple.s.value = "http://example.com/subject" - triple.p.value = "http://example.com/predicate" + triple.s.type = IRI + triple.s.iri = "http://example.com/subject" + triple.p.type = IRI + triple.p.iri = "http://example.com/predicate" + triple.o.type = LITERAL triple.o.value = "literal value" - triple.o.is_uri = False # Create mock message with metadata mock_message = MagicMock() @@ -398,16 +403,20 @@ class TestNeo4jStorageProcessor: # Create mock triples triple1 = MagicMock() - triple1.s.value = "http://example.com/subject1" - triple1.p.value = "http://example.com/predicate1" - triple1.o.value = "http://example.com/object1" - triple1.o.is_uri = True - + triple1.s.type = IRI + triple1.s.iri = "http://example.com/subject1" + triple1.p.type = IRI + triple1.p.iri = "http://example.com/predicate1" + triple1.o.type = IRI + triple1.o.iri = "http://example.com/object1" + triple2 = MagicMock() - triple2.s.value = "http://example.com/subject2" - triple2.p.value = "http://example.com/predicate2" + triple2.s.type = IRI + triple2.s.iri = "http://example.com/subject2" + triple2.p.type = IRI + triple2.p.iri = "http://example.com/predicate2" + triple2.o.type = LITERAL triple2.o.value = "literal value" - triple2.o.is_uri = False # Create mock message with metadata mock_message = MagicMock() @@ -550,10 +559,12 @@ class TestNeo4jStorageProcessor: # Create triple with special characters triple = MagicMock() - triple.s.value = "http://example.com/subject with spaces" - triple.p.value = "http://example.com/predicate:with/symbols" + triple.s.type = IRI + triple.s.iri = "http://example.com/subject with spaces" + triple.p.type = IRI + triple.p.iri = "http://example.com/predicate:with/symbols" + triple.o.type = LITERAL triple.o.value = 'literal with "quotes" and unicode: ñáéíóú' - triple.o.is_uri = False mock_message = MagicMock() mock_message.triples = [triple] diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index d06a6327..6fd9c723 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -10,12 +10,27 @@ import json import base64 from .. knowledge import hash, Uri, Literal +from .. schema import IRI, LITERAL from . types import Triple from . exceptions import ProtocolException + def to_value(x): - if x["e"]: return Uri(x["v"]) - return Literal(x["v"]) + """Convert wire format to Uri or Literal.""" + if x.get("t") == IRI: + return Uri(x.get("i", "")) + elif x.get("t") == LITERAL: + return Literal(x.get("v", "")) + # Fallback for any other type + return Literal(x.get("v", x.get("i", ""))) + + +def from_value(v): + """Convert Uri or Literal to wire format.""" + if isinstance(v, Uri): + return {"t": IRI, "i": str(v)} + else: + return {"t": LITERAL, "v": str(v)} class Flow: """ @@ -751,17 +766,17 @@ class FlowInstance: if s: if not isinstance(s, Uri): raise RuntimeError("s must be Uri") - input["s"] = { "v": str(s), "e": isinstance(s, Uri), } - + input["s"] = from_value(s) + if p: if not isinstance(p, Uri): raise RuntimeError("p must be Uri") - input["p"] = { "v": str(p), "e": isinstance(p, Uri), } + input["p"] = from_value(p) if o: if not isinstance(o, Uri) and not isinstance(o, Literal): raise RuntimeError("o must be Uri or Literal") - input["o"] = { "v": str(o), "e": isinstance(o, Uri), } + input["o"] = from_value(o) object = self.request( "service/triples", @@ -834,9 +849,9 @@ class FlowInstance: if metadata: metadata.emit( lambda t: triples.append({ - "s": { "v": t["s"], "e": isinstance(t["s"], Uri) }, - "p": { "v": t["p"], "e": isinstance(t["p"], Uri) }, - "o": { "v": t["o"], "e": isinstance(t["o"], Uri) } + "s": from_value(t["s"]), + "p": from_value(t["p"]), + "o": from_value(t["o"]), }) ) @@ -913,9 +928,9 @@ class FlowInstance: if metadata: metadata.emit( lambda t: triples.append({ - "s": { "v": t["s"], "e": isinstance(t["s"], Uri) }, - "p": { "v": t["p"], "e": isinstance(t["p"], Uri) }, - "o": { "v": t["o"], "e": isinstance(t["o"], Uri) } + "s": from_value(t["s"]), + "p": from_value(t["p"]), + "o": from_value(t["o"]), }) ) diff --git a/trustgraph-base/trustgraph/api/knowledge.py b/trustgraph-base/trustgraph/api/knowledge.py index 23f6c9f2..1fae350c 100644 --- a/trustgraph-base/trustgraph/api/knowledge.py +++ b/trustgraph-base/trustgraph/api/knowledge.py @@ -10,11 +10,18 @@ import json import base64 from .. knowledge import hash, Uri, Literal +from .. schema import IRI, LITERAL from . types import Triple + def to_value(x): - if x["e"]: return Uri(x["v"]) - return Literal(x["v"]) + """Convert wire format to Uri or Literal.""" + if x.get("t") == IRI: + return Uri(x.get("i", "")) + elif x.get("t") == LITERAL: + return Literal(x.get("v", "")) + # Fallback for any other type + return Literal(x.get("v", x.get("i", ""))) class Knowledge: """ diff --git a/trustgraph-base/trustgraph/api/library.py b/trustgraph-base/trustgraph/api/library.py index b068f627..e50dc0aa 100644 --- a/trustgraph-base/trustgraph/api/library.py +++ b/trustgraph-base/trustgraph/api/library.py @@ -12,13 +12,28 @@ import logging from . types import DocumentMetadata, ProcessingMetadata, Triple from .. knowledge import hash, Uri, Literal +from .. schema import IRI, LITERAL from . exceptions import * logger = logging.getLogger(__name__) + def to_value(x): - if x["e"]: return Uri(x["v"]) - return Literal(x["v"]) + """Convert wire format to Uri or Literal.""" + if x.get("t") == IRI: + return Uri(x.get("i", "")) + elif x.get("t") == LITERAL: + return Literal(x.get("v", "")) + # Fallback for any other type + return Literal(x.get("v", x.get("i", ""))) + + +def from_value(v): + """Convert Uri or Literal to wire format.""" + if isinstance(v, Uri): + return {"t": IRI, "i": str(v)} + else: + return {"t": LITERAL, "v": str(v)} class Library: """ @@ -118,18 +133,18 @@ class Library: if isinstance(metadata, list): triples = [ { - "s": { "v": t.s, "e": isinstance(t.s, Uri) }, - "p": { "v": t.p, "e": isinstance(t.p, Uri) }, - "o": { "v": t.o, "e": isinstance(t.o, Uri) } + "s": from_value(t.s), + "p": from_value(t.p), + "o": from_value(t.o), } for t in metadata ] elif hasattr(metadata, "emit"): metadata.emit( lambda t: triples.append({ - "s": { "v": t["s"], "e": isinstance(t["s"], Uri) }, - "p": { "v": t["p"], "e": isinstance(t["p"], Uri) }, - "o": { "v": t["o"], "e": isinstance(t["o"], Uri) } + "s": from_value(t["s"]), + "p": from_value(t["p"]), + "o": from_value(t["o"]), }) ) else: @@ -315,9 +330,9 @@ class Library: "comments": metadata.comments, "metadata": [ { - "s": { "v": t["s"], "e": isinstance(t["s"], Uri) }, - "p": { "v": t["p"], "e": isinstance(t["p"], Uri) }, - "o": { "v": t["o"], "e": isinstance(t["o"], Uri) } + "s": from_value(t["s"]), + "p": from_value(t["p"]), + "o": from_value(t["o"]), } for t in metadata.metadata ], diff --git a/trustgraph-base/trustgraph/base/document_embeddings_query_service.py b/trustgraph-base/trustgraph/base/document_embeddings_query_service.py index bca915e0..486ccd59 100644 --- a/trustgraph-base/trustgraph/base/document_embeddings_query_service.py +++ b/trustgraph-base/trustgraph/base/document_embeddings_query_service.py @@ -7,7 +7,7 @@ embeddings. import logging from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse -from .. schema import Error, Value +from .. schema import Error, Term from . flow_processor import FlowProcessor from . consumer_spec import ConsumerSpec diff --git a/trustgraph-base/trustgraph/base/graph_embeddings_client.py b/trustgraph-base/trustgraph/base/graph_embeddings_client.py index e25d76c7..07eb2bc7 100644 --- a/trustgraph-base/trustgraph/base/graph_embeddings_client.py +++ b/trustgraph-base/trustgraph/base/graph_embeddings_client.py @@ -2,15 +2,21 @@ import logging from . request_response_spec import RequestResponse, RequestResponseSpec -from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse +from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, IRI, LITERAL from .. knowledge import Uri, Literal # Module logger logger = logging.getLogger(__name__) + def to_value(x): - if x.is_uri: return Uri(x.value) - return Literal(x.value) + """Convert schema Term to Uri or Literal.""" + if x.type == IRI: + return Uri(x.iri) + elif x.type == LITERAL: + return Literal(x.value) + # Fallback + return Literal(x.value or x.iri) class GraphEmbeddingsClient(RequestResponse): async def query(self, vectors, limit=20, user="trustgraph", diff --git a/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py b/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py index f3afdba2..874b080d 100644 --- a/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py +++ b/trustgraph-base/trustgraph/base/graph_embeddings_query_service.py @@ -7,7 +7,7 @@ embeddings. import logging from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse -from .. schema import Error, Value +from .. schema import Error, Term from . flow_processor import FlowProcessor from . consumer_spec import ConsumerSpec diff --git a/trustgraph-base/trustgraph/base/triples_client.py b/trustgraph-base/trustgraph/base/triples_client.py index c9f747b5..7258d3ca 100644 --- a/trustgraph-base/trustgraph/base/triples_client.py +++ b/trustgraph-base/trustgraph/base/triples_client.py @@ -1,24 +1,34 @@ from . request_response_spec import RequestResponse, RequestResponseSpec -from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value +from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL from .. knowledge import Uri, Literal + class Triple: def __init__(self, s, p, o): self.s = s self.p = p self.o = o + def to_value(x): - if x.is_uri: return Uri(x.value) - return Literal(x.value) + """Convert schema Term to Uri or Literal.""" + if x.type == IRI: + return Uri(x.iri) + elif x.type == LITERAL: + return Literal(x.value) + # Fallback + return Literal(x.value or x.iri) + def from_value(x): - if x is None: return None + """Convert Uri or Literal to schema Term.""" + if x is None: + return None if isinstance(x, Uri): - return Value(value=str(x), is_uri=True) + return Term(type=IRI, iri=str(x)) else: - return Value(value=str(x), is_uri=False) + return Term(type=LITERAL, value=str(x)) class TriplesClient(RequestResponse): async def query(self, s=None, p=None, o=None, limit=20, diff --git a/trustgraph-base/trustgraph/base/triples_query_service.py b/trustgraph-base/trustgraph/base/triples_query_service.py index 0d8affcb..b156ef55 100644 --- a/trustgraph-base/trustgraph/base/triples_query_service.py +++ b/trustgraph-base/trustgraph/base/triples_query_service.py @@ -7,7 +7,7 @@ null. Output is a list of triples. import logging from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error -from .. schema import Value, Triple +from .. schema import Term, Triple from . flow_processor import FlowProcessor from . consumer_spec import ConsumerSpec diff --git a/trustgraph-base/trustgraph/clients/triples_query_client.py b/trustgraph-base/trustgraph/clients/triples_query_client.py index 8ed2ebb7..401aaf0b 100644 --- a/trustgraph-base/trustgraph/clients/triples_query_client.py +++ b/trustgraph-base/trustgraph/clients/triples_query_client.py @@ -2,7 +2,7 @@ import _pulsar -from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value +from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL from .. schema import triples_request_queue from .. schema import triples_response_queue from . base import BaseClient @@ -46,9 +46,9 @@ class TriplesQueryClient(BaseClient): if ent == None: return None if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) def request( self, diff --git a/trustgraph-base/trustgraph/messaging/translators/__init__.py b/trustgraph-base/trustgraph/messaging/translators/__init__.py index 9ce2730e..5849f4ce 100644 --- a/trustgraph-base/trustgraph/messaging/translators/__init__.py +++ b/trustgraph-base/trustgraph/messaging/translators/__init__.py @@ -1,5 +1,5 @@ from .base import Translator, MessageTranslator -from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator +from .primitives import TermTranslator, ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator from .agent import AgentRequestTranslator, AgentResponseTranslator from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator diff --git a/trustgraph-base/trustgraph/messaging/translators/primitives.py b/trustgraph-base/trustgraph/messaging/translators/primitives.py index 42db4151..790ae8f7 100644 --- a/trustgraph-base/trustgraph/messaging/translators/primitives.py +++ b/trustgraph-base/trustgraph/messaging/translators/primitives.py @@ -1,37 +1,133 @@ from typing import Dict, Any, List -from ...schema import Value, Triple, RowSchema, Field +from ...schema import Term, Triple, RowSchema, Field, IRI, BLANK, LITERAL, TRIPLE from .base import Translator -class ValueTranslator(Translator): - """Translator for Value schema objects""" - - def to_pulsar(self, data: Dict[str, Any]) -> Value: - return Value(value=data["v"], is_uri=data["e"]) - - def from_pulsar(self, obj: Value) -> Dict[str, Any]: - return {"v": obj.value, "e": obj.is_uri} +class TermTranslator(Translator): + """ + Translator for Term schema objects. + + Wire format (compact keys): + - "t": type (i/b/l/t) + - "i": iri (for IRI type) + - "d": id (for BLANK type) + - "v": value (for LITERAL type) + - "dt": datatype (for LITERAL type) + - "ln": language (for LITERAL type) + - "tr": triple (for TRIPLE type, nested) + """ + + def to_pulsar(self, data: Dict[str, Any]) -> Term: + term_type = data.get("t", "") + + if term_type == IRI: + return Term(type=IRI, iri=data.get("i", "")) + + elif term_type == BLANK: + return Term(type=BLANK, id=data.get("d", "")) + + elif term_type == LITERAL: + return Term( + type=LITERAL, + value=data.get("v", ""), + datatype=data.get("dt", ""), + language=data.get("ln", ""), + ) + + elif term_type == TRIPLE: + # Nested triple - use TripleTranslator + triple_data = data.get("tr") + if triple_data: + triple = _triple_translator_to_pulsar(triple_data) + else: + triple = None + return Term(type=TRIPLE, triple=triple) + + else: + # Unknown or empty type + return Term(type=term_type) + + def from_pulsar(self, obj: Term) -> Dict[str, Any]: + result: Dict[str, Any] = {"t": obj.type} + + if obj.type == IRI: + result["i"] = obj.iri + + elif obj.type == BLANK: + result["d"] = obj.id + + elif obj.type == LITERAL: + result["v"] = obj.value + if obj.datatype: + result["dt"] = obj.datatype + if obj.language: + result["ln"] = obj.language + + elif obj.type == TRIPLE: + if obj.triple: + result["tr"] = _triple_translator_from_pulsar(obj.triple) + + return result + + +# Module-level helper functions to avoid circular instantiation +def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple: + term_translator = TermTranslator() + return Triple( + s=term_translator.to_pulsar(data["s"]) if data.get("s") else None, + p=term_translator.to_pulsar(data["p"]) if data.get("p") else None, + o=term_translator.to_pulsar(data["o"]) if data.get("o") else None, + g=data.get("g"), + ) + + +def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]: + term_translator = TermTranslator() + result: Dict[str, Any] = {} + + if obj.s: + result["s"] = term_translator.from_pulsar(obj.s) + if obj.p: + result["p"] = term_translator.from_pulsar(obj.p) + if obj.o: + result["o"] = term_translator.from_pulsar(obj.o) + if obj.g: + result["g"] = obj.g + + return result class TripleTranslator(Translator): - """Translator for Triple schema objects""" - + """Translator for Triple schema objects (quads with optional graph)""" + def __init__(self): - self.value_translator = ValueTranslator() - + self.term_translator = TermTranslator() + def to_pulsar(self, data: Dict[str, Any]) -> Triple: return Triple( - s=self.value_translator.to_pulsar(data["s"]), - p=self.value_translator.to_pulsar(data["p"]), - o=self.value_translator.to_pulsar(data["o"]) + s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None, + p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None, + o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None, + g=data.get("g"), ) - + def from_pulsar(self, obj: Triple) -> Dict[str, Any]: - return { - "s": self.value_translator.from_pulsar(obj.s), - "p": self.value_translator.from_pulsar(obj.p), - "o": self.value_translator.from_pulsar(obj.o) - } + result: Dict[str, Any] = {} + + if obj.s: + result["s"] = self.term_translator.from_pulsar(obj.s) + if obj.p: + result["p"] = self.term_translator.from_pulsar(obj.p) + if obj.o: + result["o"] = self.term_translator.from_pulsar(obj.o) + if obj.g: + result["g"] = obj.g + + return result + + +# Backward compatibility alias +ValueTranslator = TermTranslator class SubgraphTranslator(Translator): diff --git a/trustgraph-base/trustgraph/messaging/translators/triples.py b/trustgraph-base/trustgraph/messaging/translators/triples.py index 1c08625b..2b01b1bc 100644 --- a/trustgraph-base/trustgraph/messaging/translators/triples.py +++ b/trustgraph-base/trustgraph/messaging/translators/triples.py @@ -14,11 +14,13 @@ class TriplesQueryRequestTranslator(MessageTranslator): s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None - + g = data.get("g") # None=default graph, "*"=all graphs + return TriplesQueryRequest( s=s, p=p, o=o, + g=g, limit=int(data.get("limit", 10000)), user=data.get("user", "trustgraph"), collection=data.get("collection", "default") @@ -30,14 +32,16 @@ class TriplesQueryRequestTranslator(MessageTranslator): "user": obj.user, "collection": obj.collection } - + if obj.s: result["s"] = self.value_translator.from_pulsar(obj.s) if obj.p: result["p"] = self.value_translator.from_pulsar(obj.p) if obj.o: result["o"] = self.value_translator.from_pulsar(obj.o) - + if obj.g is not None: + result["g"] = obj.g + return result diff --git a/trustgraph-base/trustgraph/schema/core/primitives.py b/trustgraph-base/trustgraph/schema/core/primitives.py index 02517614..78676eb0 100644 --- a/trustgraph-base/trustgraph/schema/core/primitives.py +++ b/trustgraph-base/trustgraph/schema/core/primitives.py @@ -1,22 +1,57 @@ - from dataclasses import dataclass, field +# Term type constants +IRI = "i" # IRI/URI node +BLANK = "b" # Blank node +LITERAL = "l" # Literal value +TRIPLE = "t" # Quoted triple (RDF-star) + + @dataclass class Error: type: str = "" message: str = "" + @dataclass -class Value: +class Term: + """ + RDF Term - can represent an IRI, blank node, literal, or quoted triple. + + The 'type' field determines which other fields are relevant: + - IRI: use 'iri' field + - BLANK: use 'id' field + - LITERAL: use 'value', 'datatype', 'language' fields + - TRIPLE: use 'triple' field + """ + type: str = "" # One of: IRI, BLANK, LITERAL, TRIPLE + + # For IRI terms (type == IRI) + iri: str = "" + + # For blank nodes (type == BLANK) + id: str = "" + + # For literals (type == LITERAL) value: str = "" - is_uri: bool = False - type: str = "" + datatype: str = "" # XSD datatype URI (mutually exclusive with language) + language: str = "" # Language tag (mutually exclusive with datatype) + + # For quoted triples (type == TRIPLE) + triple: "Triple | None" = None + @dataclass class Triple: - s: Value | None = None - p: Value | None = None - o: Value | None = None + """ + RDF Triple / Quad. + + The optional 'g' field specifies the named graph (None = default graph). + """ + s: Term | None = None # Subject + p: Term | None = None # Predicate + o: Term | None = None # Object + g: str | None = None # Graph name (IRI), None = default graph @dataclass class Field: diff --git a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py index a3e5b394..473ec3a4 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/embeddings.py +++ b/trustgraph-base/trustgraph/schema/knowledge/embeddings.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from ..core.metadata import Metadata -from ..core.primitives import Value, RowSchema +from ..core.primitives import Term, RowSchema from ..core.topic import topic ############################################################################ @@ -10,7 +10,7 @@ from ..core.topic import topic @dataclass class EntityEmbeddings: - entity: Value | None = None + entity: Term | None = None vectors: list[list[float]] = field(default_factory=list) # This is a 'batching' mechanism for the above data diff --git a/trustgraph-base/trustgraph/schema/knowledge/graph.py b/trustgraph-base/trustgraph/schema/knowledge/graph.py index 9040c25e..4ee8d2c0 100644 --- a/trustgraph-base/trustgraph/schema/knowledge/graph.py +++ b/trustgraph-base/trustgraph/schema/knowledge/graph.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field -from ..core.primitives import Value, Triple +from ..core.primitives import Term, Triple from ..core.metadata import Metadata from ..core.topic import topic @@ -10,7 +10,7 @@ from ..core.topic import topic @dataclass class EntityContext: - entity: Value | None = None + entity: Term | None = None context: str = "" # This is a 'batching' mechanism for the above data diff --git a/trustgraph-base/trustgraph/schema/services/lookup.py b/trustgraph-base/trustgraph/schema/services/lookup.py index bdeac636..d944fb89 100644 --- a/trustgraph-base/trustgraph/schema/services/lookup.py +++ b/trustgraph-base/trustgraph/schema/services/lookup.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from ..core.primitives import Error, Value, Triple +from ..core.primitives import Error, Term, Triple from ..core.topic import topic from ..core.metadata import Metadata diff --git a/trustgraph-base/trustgraph/schema/services/query.py b/trustgraph-base/trustgraph/schema/services/query.py index 31d0852d..dc33febe 100644 --- a/trustgraph-base/trustgraph/schema/services/query.py +++ b/trustgraph-base/trustgraph/schema/services/query.py @@ -1,6 +1,6 @@ from dataclasses import dataclass, field -from ..core.primitives import Error, Value, Triple +from ..core.primitives import Error, Term, Triple from ..core.topic import topic ############################################################################ @@ -17,7 +17,7 @@ class GraphEmbeddingsRequest: @dataclass class GraphEmbeddingsResponse: error: Error | None = None - entities: list[Value] = field(default_factory=list) + entities: list[Term] = field(default_factory=list) ############################################################################ @@ -27,9 +27,10 @@ class GraphEmbeddingsResponse: class TriplesQueryRequest: user: str = "" collection: str = "" - s: Value | None = None - p: Value | None = None - o: Value | None = None + s: Term | None = None + p: Term | None = None + o: Term | None = None + g: str | None = None # Graph IRI. None=default graph, "*"=all graphs limit: int = 0 @dataclass diff --git a/trustgraph-base/trustgraph/schema/services/retrieval.py b/trustgraph-base/trustgraph/schema/services/retrieval.py index 72085ae8..4337cb9b 100644 --- a/trustgraph-base/trustgraph/schema/services/retrieval.py +++ b/trustgraph-base/trustgraph/schema/services/retrieval.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from ..core.topic import topic -from ..core.primitives import Error, Value +from ..core.primitives import Error, Term ############################################################################ diff --git a/trustgraph-bedrock/pyproject.toml b/trustgraph-bedrock/pyproject.toml index 81d5eaca..4e093953 100644 --- a/trustgraph-bedrock/pyproject.toml +++ b/trustgraph-bedrock/pyproject.toml @@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", "pulsar-client", "prometheus-client", "boto3", diff --git a/trustgraph-cli/pyproject.toml b/trustgraph-cli/pyproject.toml index e952880a..ab7980ac 100644 --- a/trustgraph-cli/pyproject.toml +++ b/trustgraph-cli/pyproject.toml @@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", "requests", "pulsar-client", "aiohttp", diff --git a/trustgraph-embeddings-hf/pyproject.toml b/trustgraph-embeddings-hf/pyproject.toml index 1a74db2b..79e14540 100644 --- a/trustgraph-embeddings-hf/pyproject.toml +++ b/trustgraph-embeddings-hf/pyproject.toml @@ -10,8 +10,8 @@ description = "HuggingFace embeddings support for TrustGraph." readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", - "trustgraph-flow>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", + "trustgraph-flow>=2.0,<2.1", "torch", "urllib3", "transformers", diff --git a/trustgraph-flow/pyproject.toml b/trustgraph-flow/pyproject.toml index 49f359aa..381aa778 100644 --- a/trustgraph-flow/pyproject.toml +++ b/trustgraph-flow/pyproject.toml @@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", "aiohttp", "anthropic", "scylla-driver", diff --git a/trustgraph-flow/trustgraph/direct/cassandra_kg.py b/trustgraph-flow/trustgraph/direct/cassandra_kg.py index 116abe02..f8a20041 100644 --- a/trustgraph-flow/trustgraph/direct/cassandra_kg.py +++ b/trustgraph-flow/trustgraph/direct/cassandra_kg.py @@ -11,7 +11,24 @@ _active_clusters = [] logger = logging.getLogger(__name__) +# Sentinel value for wildcard graph queries +GRAPH_WILDCARD = "*" + +# Default graph stored as empty string +DEFAULT_GRAPH = "" + + class KnowledgeGraph: + """ + Cassandra-backed knowledge graph supporting quads (s, p, o, g). + + Uses 7 tables to support all 16 query patterns efficiently: + - Family A (g-wildcard): SPOG, POSG, OSPG + - Family B (g-specified): GSPO, GPOS, GOSP + - Collection table: COLL (for iteration/deletion) + + Plus a metadata table for tracking collections. + """ def __init__( self, hosts=None, @@ -24,12 +41,22 @@ class KnowledgeGraph: self.keyspace = keyspace self.username = username - # Optimized multi-table schema with collection deletion support - self.subject_table = "triples_s" - self.po_table = "triples_p" - self.object_table = "triples_o" - self.collection_table = "triples_collection" # For SPO queries and deletion - self.collection_metadata_table = "collection_metadata" # For tracking which collections exist + # 7-table schema for quads with full query pattern support + # Family A: g-wildcard queries (g in clustering columns) + self.spog_table = "quads_spog" # partition (collection, s), cluster (p, o, g) + self.posg_table = "quads_posg" # partition (collection, p), cluster (o, s, g) + self.ospg_table = "quads_ospg" # partition (collection, o), cluster (s, p, g) + + # Family B: g-specified queries (g in partition key) + self.gspo_table = "quads_gspo" # partition (collection, g, s), cluster (p, o) + self.gpos_table = "quads_gpos" # partition (collection, g, p), cluster (o, s) + self.gosp_table = "quads_gosp" # partition (collection, g, o), cluster (s, p) + + # Collection table for iteration and bulk deletion + self.coll_table = "quads_coll" # partition (collection), cluster (g, s, p, o) + + # Collection metadata tracking + self.collection_metadata_table = "collection_metadata" if username and password: ssl_context = SSLContext(PROTOCOL_TLSv1_2) @@ -46,237 +73,376 @@ class KnowledgeGraph: self.prepare_statements() def clear(self): - self.session.execute(f""" drop keyspace if exists {self.keyspace}; - """); - + """) self.init() def init(self): - self.session.execute(f""" create keyspace if not exists {self.keyspace} with replication = {{ 'class' : 'SimpleStrategy', 'replication_factor' : 1 }}; - """); + """) self.session.set_keyspace(self.keyspace) - self.init_optimized_schema() + self.init_quad_schema() + def init_quad_schema(self): + """Initialize 7-table schema for quads with full query pattern support""" - def init_optimized_schema(self): - """Initialize optimized multi-table schema for performance""" - # Table 1: Subject-centric queries (get_s, get_sp, get_os) - # Compound partition key for optimal data distribution + # Family A: g-wildcard queries (g in clustering columns) + + # SPOG: partition (collection, s), cluster (p, o, g) + # Supports: (?, s, ?, ?), (?, s, p, ?), (?, s, p, o) self.session.execute(f""" - CREATE TABLE IF NOT EXISTS {self.subject_table} ( + CREATE TABLE IF NOT EXISTS {self.spog_table} ( collection text, s text, p text, o text, - PRIMARY KEY ((collection, s), p, o) + g text, + PRIMARY KEY ((collection, s), p, o, g) ); - """); + """) - # Table 2: Predicate-Object queries (get_p, get_po) - eliminates ALLOW FILTERING! - # Compound partition key for optimal data distribution + # POSG: partition (collection, p), cluster (o, s, g) + # Supports: (?, ?, p, ?), (?, ?, p, o) self.session.execute(f""" - CREATE TABLE IF NOT EXISTS {self.po_table} ( + CREATE TABLE IF NOT EXISTS {self.posg_table} ( collection text, p text, o text, s text, - PRIMARY KEY ((collection, p), o, s) + g text, + PRIMARY KEY ((collection, p), o, s, g) ); - """); + """) - # Table 3: Object-centric queries (get_o) - # Compound partition key for optimal data distribution + # OSPG: partition (collection, o), cluster (s, p, g) + # Supports: (?, ?, ?, o), (?, s, ?, o) self.session.execute(f""" - CREATE TABLE IF NOT EXISTS {self.object_table} ( + CREATE TABLE IF NOT EXISTS {self.ospg_table} ( collection text, o text, s text, p text, - PRIMARY KEY ((collection, o), s, p) + g text, + PRIMARY KEY ((collection, o), s, p, g) ); - """); + """) - # Table 4: Collection management and SPO queries (get_spo) - # Simple partition key enables efficient collection deletion + # Family B: g-specified queries (g in partition key) + + # GSPO: partition (collection, g, s), cluster (p, o) + # Supports: (g, s, ?, ?), (g, s, p, ?), (g, s, p, o) self.session.execute(f""" - CREATE TABLE IF NOT EXISTS {self.collection_table} ( + CREATE TABLE IF NOT EXISTS {self.gspo_table} ( collection text, + g text, s text, p text, o text, - PRIMARY KEY (collection, s, p, o) + PRIMARY KEY ((collection, g, s), p, o) ); - """); + """) - # Table 5: Collection metadata tracking - # Tracks which collections exist without polluting triple data + # GPOS: partition (collection, g, p), cluster (o, s) + # Supports: (g, ?, p, ?), (g, ?, p, o) + self.session.execute(f""" + CREATE TABLE IF NOT EXISTS {self.gpos_table} ( + collection text, + g text, + p text, + o text, + s text, + PRIMARY KEY ((collection, g, p), o, s) + ); + """) + + # GOSP: partition (collection, g, o), cluster (s, p) + # Supports: (g, ?, ?, o), (g, s, ?, o) + self.session.execute(f""" + CREATE TABLE IF NOT EXISTS {self.gosp_table} ( + collection text, + g text, + o text, + s text, + p text, + PRIMARY KEY ((collection, g, o), s, p) + ); + """) + + # Collection table for iteration and bulk deletion + # COLL: partition (collection), cluster (g, s, p, o) + self.session.execute(f""" + CREATE TABLE IF NOT EXISTS {self.coll_table} ( + collection text, + g text, + s text, + p text, + o text, + PRIMARY KEY (collection, g, s, p, o) + ); + """) + + # Collection metadata tracking self.session.execute(f""" CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} ( collection text, created_at timestamp, PRIMARY KEY (collection) ); - """); + """) - logger.info("Optimized multi-table schema initialized (5 tables)") + logger.info("Quad schema initialized (7 tables + metadata)") def prepare_statements(self): - """Prepare statements for optimal performance""" - # Insert statements for batch operations - self.insert_subject_stmt = self.session.prepare( - f"INSERT INTO {self.subject_table} (collection, s, p, o) VALUES (?, ?, ?, ?)" + """Prepare statements for all 7 tables""" + + # Insert statements + self.insert_spog_stmt = self.session.prepare( + f"INSERT INTO {self.spog_table} (collection, s, p, o, g) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_posg_stmt = self.session.prepare( + f"INSERT INTO {self.posg_table} (collection, p, o, s, g) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_ospg_stmt = self.session.prepare( + f"INSERT INTO {self.ospg_table} (collection, o, s, p, g) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_gspo_stmt = self.session.prepare( + f"INSERT INTO {self.gspo_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_gpos_stmt = self.session.prepare( + f"INSERT INTO {self.gpos_table} (collection, g, p, o, s) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_gosp_stmt = self.session.prepare( + f"INSERT INTO {self.gosp_table} (collection, g, o, s, p) VALUES (?, ?, ?, ?, ?)" + ) + self.insert_coll_stmt = self.session.prepare( + f"INSERT INTO {self.coll_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)" ) - self.insert_po_stmt = self.session.prepare( - f"INSERT INTO {self.po_table} (collection, p, o, s) VALUES (?, ?, ?, ?)" + # Delete statements (for single quad deletion) + self.delete_spog_stmt = self.session.prepare( + f"DELETE FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? AND g = ?" + ) + self.delete_posg_stmt = self.session.prepare( + f"DELETE FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? AND s = ? AND g = ?" + ) + self.delete_ospg_stmt = self.session.prepare( + f"DELETE FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? AND p = ? AND g = ?" + ) + self.delete_gspo_stmt = self.session.prepare( + f"DELETE FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?" + ) + self.delete_gpos_stmt = self.session.prepare( + f"DELETE FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? AND s = ?" + ) + self.delete_gosp_stmt = self.session.prepare( + f"DELETE FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? AND p = ?" + ) + self.delete_coll_stmt = self.session.prepare( + f"DELETE FROM {self.coll_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?" ) - self.insert_object_stmt = self.session.prepare( - f"INSERT INTO {self.object_table} (collection, o, s, p) VALUES (?, ?, ?, ?)" + # Query statements - Family A (g-wildcard, g in clustering) + + # SPOG table queries + self.get_s_wildcard_stmt = self.session.prepare( + f"SELECT p, o, g FROM {self.spog_table} WHERE collection = ? AND s = ? LIMIT ?" + ) + self.get_sp_wildcard_stmt = self.session.prepare( + f"SELECT o, g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?" + ) + self.get_spo_wildcard_stmt = self.session.prepare( + f"SELECT g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?" ) - self.insert_collection_stmt = self.session.prepare( - f"INSERT INTO {self.collection_table} (collection, s, p, o) VALUES (?, ?, ?, ?)" + # POSG table queries + self.get_p_wildcard_stmt = self.session.prepare( + f"SELECT o, s, g FROM {self.posg_table} WHERE collection = ? AND p = ? LIMIT ?" + ) + self.get_po_wildcard_stmt = self.session.prepare( + f"SELECT s, g FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?" ) - # Query statements for optimized access + # OSPG table queries + self.get_o_wildcard_stmt = self.session.prepare( + f"SELECT s, p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? LIMIT ?" + ) + self.get_os_wildcard_stmt = self.session.prepare( + f"SELECT p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?" + ) + + # Query statements - Family B (g-specified, g in partition) + + # GSPO table queries + self.get_gs_stmt = self.session.prepare( + f"SELECT p, o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? LIMIT ?" + ) + self.get_gsp_stmt = self.session.prepare( + f"SELECT o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? LIMIT ?" + ) + self.get_gspo_stmt = self.session.prepare( + f"SELECT s FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ? LIMIT ?" + ) + + # GPOS table queries + self.get_gp_stmt = self.session.prepare( + f"SELECT o, s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? LIMIT ?" + ) + self.get_gpo_stmt = self.session.prepare( + f"SELECT s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? LIMIT ?" + ) + + # GOSP table queries + self.get_go_stmt = self.session.prepare( + f"SELECT s, p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? LIMIT ?" + ) + self.get_gos_stmt = self.session.prepare( + f"SELECT p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? LIMIT ?" + ) + + # Collection table query (for get_all and iteration) self.get_all_stmt = self.session.prepare( - f"SELECT s, p, o FROM {self.subject_table} WHERE collection = ? LIMIT ? ALLOW FILTERING" + f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = ? LIMIT ?" + ) + self.get_g_stmt = self.session.prepare( + f"SELECT s, p, o FROM {self.coll_table} WHERE collection = ? AND g = ? LIMIT ?" ) - self.get_s_stmt = self.session.prepare( - f"SELECT p, o FROM {self.subject_table} WHERE collection = ? AND s = ? LIMIT ?" - ) + logger.info("Prepared statements initialized for quad schema (7 tables)") - self.get_p_stmt = self.session.prepare( - f"SELECT s, o FROM {self.po_table} WHERE collection = ? AND p = ? LIMIT ?" - ) + def insert(self, collection, s, p, o, g=None): + """Insert a quad into all 7 tables""" + # Default graph stored as empty string + if g is None: + g = DEFAULT_GRAPH - self.get_o_stmt = self.session.prepare( - f"SELECT s, p FROM {self.object_table} WHERE collection = ? AND o = ? LIMIT ?" - ) - - self.get_sp_stmt = self.session.prepare( - f"SELECT o FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?" - ) - - # The critical optimization: get_po without ALLOW FILTERING! - self.get_po_stmt = self.session.prepare( - f"SELECT s FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?" - ) - - self.get_os_stmt = self.session.prepare( - f"SELECT p FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?" - ) - - self.get_spo_stmt = self.session.prepare( - f"SELECT s as x FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?" - ) - - # Delete statements for collection deletion - self.delete_subject_stmt = self.session.prepare( - f"DELETE FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?" - ) - - self.delete_po_stmt = self.session.prepare( - f"DELETE FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? AND s = ?" - ) - - self.delete_object_stmt = self.session.prepare( - f"DELETE FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? AND p = ?" - ) - - self.delete_collection_stmt = self.session.prepare( - f"DELETE FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?" - ) - - logger.info("Prepared statements initialized for optimal performance (4 tables)") - - def insert(self, collection, s, p, o): - # Batch write to all four tables for consistency batch = BatchStatement() - # Insert into subject table - batch.add(self.insert_subject_stmt, (collection, s, p, o)) + # Family A tables + batch.add(self.insert_spog_stmt, (collection, s, p, o, g)) + batch.add(self.insert_posg_stmt, (collection, p, o, s, g)) + batch.add(self.insert_ospg_stmt, (collection, o, s, p, g)) - # Insert into predicate-object table (column order: collection, p, o, s) - batch.add(self.insert_po_stmt, (collection, p, o, s)) + # Family B tables + batch.add(self.insert_gspo_stmt, (collection, g, s, p, o)) + batch.add(self.insert_gpos_stmt, (collection, g, p, o, s)) + batch.add(self.insert_gosp_stmt, (collection, g, o, s, p)) - # Insert into object table (column order: collection, o, s, p) - batch.add(self.insert_object_stmt, (collection, o, s, p)) - - # Insert into collection table for SPO queries and deletion tracking - batch.add(self.insert_collection_stmt, (collection, s, p, o)) + # Collection table + batch.add(self.insert_coll_stmt, (collection, g, s, p, o)) self.session.execute(batch) + def delete_quad(self, collection, s, p, o, g=None): + """Delete a single quad from all 7 tables""" + if g is None: + g = DEFAULT_GRAPH + + batch = BatchStatement() + + batch.add(self.delete_spog_stmt, (collection, s, p, o, g)) + batch.add(self.delete_posg_stmt, (collection, p, o, s, g)) + batch.add(self.delete_ospg_stmt, (collection, o, s, p, g)) + batch.add(self.delete_gspo_stmt, (collection, g, s, p, o)) + batch.add(self.delete_gpos_stmt, (collection, g, p, o, s)) + batch.add(self.delete_gosp_stmt, (collection, g, o, s, p)) + batch.add(self.delete_coll_stmt, (collection, g, s, p, o)) + + self.session.execute(batch) + + # ======================================================================== + # Query methods + # g=None means default graph, g="*" means all graphs + # ======================================================================== + def get_all(self, collection, limit=50): - # Use subject table for get_all queries - return self.session.execute( - self.get_all_stmt, - (collection, limit) - ) + """Get all quads in collection""" + return self.session.execute(self.get_all_stmt, (collection, limit)) - def get_s(self, collection, s, limit=10): - # Optimized: Direct partition access with (collection, s) - return self.session.execute( - self.get_s_stmt, - (collection, s, limit) - ) + def get_s(self, collection, s, g=None, limit=10): + """Query by subject. g=None: default graph, g='*': all graphs""" + if g is None or g == DEFAULT_GRAPH: + # Default graph - use GSPO table + return self.session.execute(self.get_gs_stmt, (collection, DEFAULT_GRAPH, s, limit)) + elif g == GRAPH_WILDCARD: + # All graphs - use SPOG table + return self.session.execute(self.get_s_wildcard_stmt, (collection, s, limit)) + else: + # Specific graph - use GSPO table + return self.session.execute(self.get_gs_stmt, (collection, g, s, limit)) - def get_p(self, collection, p, limit=10): - # Optimized: Use po_table for direct partition access - return self.session.execute( - self.get_p_stmt, - (collection, p, limit) - ) + def get_p(self, collection, p, g=None, limit=10): + """Query by predicate""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_gp_stmt, (collection, DEFAULT_GRAPH, p, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_p_wildcard_stmt, (collection, p, limit)) + else: + return self.session.execute(self.get_gp_stmt, (collection, g, p, limit)) - def get_o(self, collection, o, limit=10): - # Optimized: Use object_table for direct partition access - return self.session.execute( - self.get_o_stmt, - (collection, o, limit) - ) + def get_o(self, collection, o, g=None, limit=10): + """Query by object""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_go_stmt, (collection, DEFAULT_GRAPH, o, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_o_wildcard_stmt, (collection, o, limit)) + else: + return self.session.execute(self.get_go_stmt, (collection, g, o, limit)) - def get_sp(self, collection, s, p, limit=10): - # Optimized: Use subject_table with clustering key access - return self.session.execute( - self.get_sp_stmt, - (collection, s, p, limit) - ) + def get_sp(self, collection, s, p, g=None, limit=10): + """Query by subject and predicate""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_gsp_stmt, (collection, DEFAULT_GRAPH, s, p, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_sp_wildcard_stmt, (collection, s, p, limit)) + else: + return self.session.execute(self.get_gsp_stmt, (collection, g, s, p, limit)) - def get_po(self, collection, p, o, limit=10): - # CRITICAL OPTIMIZATION: Use po_table - NO MORE ALLOW FILTERING! - return self.session.execute( - self.get_po_stmt, - (collection, p, o, limit) - ) + def get_po(self, collection, p, o, g=None, limit=10): + """Query by predicate and object""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_gpo_stmt, (collection, DEFAULT_GRAPH, p, o, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_po_wildcard_stmt, (collection, p, o, limit)) + else: + return self.session.execute(self.get_gpo_stmt, (collection, g, p, o, limit)) - def get_os(self, collection, o, s, limit=10): - # Optimized: Use subject_table with clustering access (no more ALLOW FILTERING) - return self.session.execute( - self.get_os_stmt, - (collection, s, o, limit) - ) + def get_os(self, collection, o, s, g=None, limit=10): + """Query by object and subject""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_gos_stmt, (collection, DEFAULT_GRAPH, o, s, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_os_wildcard_stmt, (collection, o, s, limit)) + else: + return self.session.execute(self.get_gos_stmt, (collection, g, o, s, limit)) - def get_spo(self, collection, s, p, o, limit=10): - # Optimized: Use collection_table for exact key lookup - return self.session.execute( - self.get_spo_stmt, - (collection, s, p, o, limit) - ) + def get_spo(self, collection, s, p, o, g=None, limit=10): + """Query by subject, predicate, object (find which graphs)""" + if g is None or g == DEFAULT_GRAPH: + return self.session.execute(self.get_gspo_stmt, (collection, DEFAULT_GRAPH, s, p, o, limit)) + elif g == GRAPH_WILDCARD: + return self.session.execute(self.get_spo_wildcard_stmt, (collection, s, p, o, limit)) + else: + return self.session.execute(self.get_gspo_stmt, (collection, g, s, p, o, limit)) + + def get_g(self, collection, g, limit=50): + """Get all quads in a specific graph""" + if g is None: + g = DEFAULT_GRAPH + return self.session.execute(self.get_g_stmt, (collection, g, limit)) + + # ======================================================================== + # Collection management + # ======================================================================== def collection_exists(self, collection): - """Check if collection exists by querying collection_metadata table""" + """Check if collection exists""" try: result = self.session.execute( f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1", @@ -301,63 +467,52 @@ class KnowledgeGraph: raise e def delete_collection(self, collection): - """Delete all triples for a specific collection - - Uses collection_table to enumerate all triples, then deletes from all 4 tables - using full partition keys for optimal performance with compound keys. - """ - # Step 1: Read all triples from collection_table (single partition read) + """Delete all quads for a collection from all 7 tables""" + # Read all quads from collection table rows = self.session.execute( - f"SELECT s, p, o FROM {self.collection_table} WHERE collection = %s", + f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = %s", (collection,) ) - # Step 2: Delete each triple from all 4 tables using full partition keys - # Batch deletions for efficiency batch = BatchStatement() count = 0 for row in rows: - s, p, o = row.s, row.p, row.o + g, s, p, o = row.g, row.s, row.p, row.o - # Delete from subject table (partition key: collection, s) - batch.add(self.delete_subject_stmt, (collection, s, p, o)) - - # Delete from predicate-object table (partition key: collection, p) - batch.add(self.delete_po_stmt, (collection, p, o, s)) - - # Delete from object table (partition key: collection, o) - batch.add(self.delete_object_stmt, (collection, o, s, p)) - - # Delete from collection table (partition key: collection only) - batch.add(self.delete_collection_stmt, (collection, s, p, o)) + # Delete from all 7 tables + batch.add(self.delete_spog_stmt, (collection, s, p, o, g)) + batch.add(self.delete_posg_stmt, (collection, p, o, s, g)) + batch.add(self.delete_ospg_stmt, (collection, o, s, p, g)) + batch.add(self.delete_gspo_stmt, (collection, g, s, p, o)) + batch.add(self.delete_gpos_stmt, (collection, g, p, o, s)) + batch.add(self.delete_gosp_stmt, (collection, g, o, s, p)) + batch.add(self.delete_coll_stmt, (collection, g, s, p, o)) count += 1 - # Execute batch every 25 triples to avoid oversized batches - # (Each triple adds ~4 statements, so 25 triples = ~100 statements) - if count % 25 == 0: + # Execute batch every 15 quads (7 deletes each = 105 statements) + if count % 15 == 0: self.session.execute(batch) batch = BatchStatement() - # Execute remaining deletions - if count % 25 != 0: + # Execute remaining + if count % 15 != 0: self.session.execute(batch) - # Step 3: Delete collection metadata + # Delete collection metadata self.session.execute( f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s", (collection,) ) - logger.info(f"Deleted {count} triples from collection {collection}") + logger.info(f"Deleted {count} quads from collection {collection}") def close(self): - """Close the Cassandra session and cluster connections properly""" + """Close connections""" if hasattr(self, 'session') and self.session: self.session.shutdown() if hasattr(self, 'cluster') and self.cluster: self.cluster.shutdown() - # Remove from global tracking if self.cluster in _active_clusters: _active_clusters.remove(self.cluster) diff --git a/trustgraph-flow/trustgraph/extract/kg/agent/extract.py b/trustgraph-flow/trustgraph/extract/kg/agent/extract.py index 58230a41..d9057909 100644 --- a/trustgraph-flow/trustgraph/extract/kg/agent/extract.py +++ b/trustgraph-flow/trustgraph/extract/kg/agent/extract.py @@ -3,7 +3,7 @@ import json import urllib.parse import logging -from ....schema import Chunk, Triple, Triples, Metadata, Value +from ....schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL from ....schema import EntityContext, EntityContexts from ....rdf import TRUSTGRAPH_ENTITIES, RDF_LABEL, SUBJECT_OF, DEFINITION @@ -253,32 +253,32 @@ class Processor(FlowProcessor): for defn in definitions: entity_uri = self.to_uri(defn["entity"]) - + # Add entity label triples.append(Triple( - s = Value(value=entity_uri, is_uri=True), - p = Value(value=RDF_LABEL, is_uri=True), - o = Value(value=defn["entity"], is_uri=False), + s = Term(type=IRI, iri=entity_uri), + p = Term(type=IRI, iri=RDF_LABEL), + o = Term(type=LITERAL, value=defn["entity"]), )) - + # Add definition triples.append(Triple( - s = Value(value=entity_uri, is_uri=True), - p = Value(value=DEFINITION, is_uri=True), - o = Value(value=defn["definition"], is_uri=False), + s = Term(type=IRI, iri=entity_uri), + p = Term(type=IRI, iri=DEFINITION), + o = Term(type=LITERAL, value=defn["definition"]), )) - + # Add subject-of relationship to document if metadata.id: triples.append(Triple( - s = Value(value=entity_uri, is_uri=True), - p = Value(value=SUBJECT_OF, is_uri=True), - o = Value(value=metadata.id, is_uri=True), + s = Term(type=IRI, iri=entity_uri), + p = Term(type=IRI, iri=SUBJECT_OF), + o = Term(type=IRI, iri=metadata.id), )) - + # Create entity context for embeddings entity_contexts.append(EntityContext( - entity=Value(value=entity_uri, is_uri=True), + entity=Term(type=IRI, iri=entity_uri), context=defn["definition"] )) @@ -288,61 +288,61 @@ class Processor(FlowProcessor): subject_uri = self.to_uri(rel["subject"]) predicate_uri = self.to_uri(rel["predicate"]) - subject_value = Value(value=subject_uri, is_uri=True) - predicate_value = Value(value=predicate_uri, is_uri=True) + subject_value = Term(type=IRI, iri=subject_uri) + predicate_value = Term(type=IRI, iri=predicate_uri) if rel.get("object-entity", True): object_uri = self.to_uri(rel["object"]) - object_value = Value(value=object_uri, is_uri=True) + object_value = Term(type=IRI, iri=object_uri) else: - object_value = Value(value=rel["object"], is_uri=False) - + object_value = Term(type=LITERAL, value=rel["object"]) + # Add subject and predicate labels triples.append(Triple( s = subject_value, - p = Value(value=RDF_LABEL, is_uri=True), - o = Value(value=rel["subject"], is_uri=False), + p = Term(type=IRI, iri=RDF_LABEL), + o = Term(type=LITERAL, value=rel["subject"]), )) - + triples.append(Triple( s = predicate_value, - p = Value(value=RDF_LABEL, is_uri=True), - o = Value(value=rel["predicate"], is_uri=False), + p = Term(type=IRI, iri=RDF_LABEL), + o = Term(type=LITERAL, value=rel["predicate"]), )) - + # Handle object (entity vs literal) if rel.get("object-entity", True): triples.append(Triple( s = object_value, - p = Value(value=RDF_LABEL, is_uri=True), - o = Value(value=rel["object"], is_uri=True), + p = Term(type=IRI, iri=RDF_LABEL), + o = Term(type=LITERAL, value=rel["object"]), )) - + # Add the main relationship triple triples.append(Triple( s = subject_value, p = predicate_value, o = object_value )) - + # Add subject-of relationships to document if metadata.id: triples.append(Triple( s = subject_value, - p = Value(value=SUBJECT_OF, is_uri=True), - o = Value(value=metadata.id, is_uri=True), + p = Term(type=IRI, iri=SUBJECT_OF), + o = Term(type=IRI, iri=metadata.id), )) - + triples.append(Triple( s = predicate_value, - p = Value(value=SUBJECT_OF, is_uri=True), - o = Value(value=metadata.id, is_uri=True), + p = Term(type=IRI, iri=SUBJECT_OF), + o = Term(type=IRI, iri=metadata.id), )) - + if rel.get("object-entity", True): triples.append(Triple( s = object_value, - p = Value(value=SUBJECT_OF, is_uri=True), - o = Value(value=metadata.id, is_uri=True), + p = Term(type=IRI, iri=SUBJECT_OF), + o = Term(type=IRI, iri=metadata.id), )) return triples, entity_contexts diff --git a/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py b/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py index 1d414b7e..8c278c25 100755 --- a/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py +++ b/trustgraph-flow/trustgraph/extract/kg/definitions/extract.py @@ -9,7 +9,7 @@ import json import urllib.parse import logging -from .... schema import Chunk, Triple, Triples, Metadata, Value +from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL # Module logger logger = logging.getLogger(__name__) @@ -20,9 +20,9 @@ from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF from .... base import FlowProcessor, ConsumerSpec, ProducerSpec from .... base import PromptClientSpec -DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True) -RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True) -SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True) +DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION) +RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL) +SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF) default_ident = "kg-extract-definitions" default_concurrency = 1 @@ -142,13 +142,13 @@ class Processor(FlowProcessor): s_uri = self.to_uri(s) - s_value = Value(value=str(s_uri), is_uri=True) - o_value = Value(value=str(o), is_uri=False) + s_value = Term(type=IRI, iri=str(s_uri)) + o_value = Term(type=LITERAL, value=str(o)) triples.append(Triple( s=s_value, p=RDF_LABEL_VALUE, - o=Value(value=s, is_uri=False), + o=Term(type=LITERAL, value=s), )) triples.append(Triple( @@ -158,7 +158,7 @@ class Processor(FlowProcessor): triples.append(Triple( s=s_value, p=SUBJECT_OF_VALUE, - o=Value(value=v.metadata.id, is_uri=True) + o=Term(type=IRI, iri=v.metadata.id) )) ec = EntityContext( diff --git a/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py b/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py index 335f07d2..adcbe0a8 100644 --- a/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py +++ b/trustgraph-flow/trustgraph/extract/kg/ontology/extract.py @@ -8,7 +8,7 @@ import logging import asyncio from typing import List, Dict, Any, Optional -from .... schema import Chunk, Triple, Triples, Metadata, Value +from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL from .... schema import EntityContext, EntityContexts from .... schema import PromptRequest, PromptResponse from .... rdf import TRUSTGRAPH_ENTITIES, RDF_TYPE, RDF_LABEL, DEFINITION @@ -39,6 +39,14 @@ URI_PREFIXES = { } +def make_term(v, is_uri): + """Helper to create Term from value and is_uri flag.""" + if is_uri: + return Term(type=IRI, iri=v) + else: + return Term(type=LITERAL, value=v) + + class Processor(FlowProcessor): """Main OntoRAG extraction processor.""" @@ -446,9 +454,9 @@ class Processor(FlowProcessor): is_object_uri = False # Create Triple object with expanded URIs - s_value = Value(value=subject_uri, is_uri=True) - p_value = Value(value=predicate_uri, is_uri=True) - o_value = Value(value=object_uri, is_uri=is_object_uri) + s_value = make_term(subject_uri, is_uri=True) + p_value = make_term(predicate_uri, is_uri=True) + o_value = make_term(object_uri, is_uri=is_object_uri) validated_triples.append(Triple( s=s_value, @@ -609,9 +617,9 @@ class Processor(FlowProcessor): # rdf:type owl:Class ontology_triples.append(Triple( - s=Value(value=class_uri, is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://www.w3.org/2002/07/owl#Class", is_uri=True) + s=make_term(class_uri, is_uri=True), + p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), + o=make_term("http://www.w3.org/2002/07/owl#Class", is_uri=True) )) # rdfs:label (stored as 'labels' in OntologyClass.__dict__) @@ -620,18 +628,18 @@ class Processor(FlowProcessor): if isinstance(labels, list) and labels: label_val = labels[0].get('value', class_id) if isinstance(labels[0], dict) else str(labels[0]) ontology_triples.append(Triple( - s=Value(value=class_uri, is_uri=True), - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=label_val, is_uri=False) + s=make_term(class_uri, is_uri=True), + p=make_term(RDF_LABEL, is_uri=True), + o=make_term(label_val, is_uri=False) )) # rdfs:comment (stored as 'comment' in OntologyClass.__dict__) if isinstance(class_def, dict) and 'comment' in class_def and class_def['comment']: comment = class_def['comment'] ontology_triples.append(Triple( - s=Value(value=class_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), - o=Value(value=comment, is_uri=False) + s=make_term(class_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), + o=make_term(comment, is_uri=False) )) # rdfs:subClassOf (stored as 'subclass_of' in OntologyClass.__dict__) @@ -648,9 +656,9 @@ class Processor(FlowProcessor): parent_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{parent}" ontology_triples.append(Triple( - s=Value(value=class_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True), - o=Value(value=parent_uri, is_uri=True) + s=make_term(class_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True), + o=make_term(parent_uri, is_uri=True) )) # Generate triples for object properties @@ -663,9 +671,9 @@ class Processor(FlowProcessor): # rdf:type owl:ObjectProperty ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), + o=make_term("http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True) )) # rdfs:label (stored as 'labels' in OntologyProperty.__dict__) @@ -674,18 +682,18 @@ class Processor(FlowProcessor): if isinstance(labels, list) and labels: label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0]) ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=label_val, is_uri=False) + s=make_term(prop_uri, is_uri=True), + p=make_term(RDF_LABEL, is_uri=True), + o=make_term(label_val, is_uri=False) )) # rdfs:comment (stored as 'comment' in OntologyProperty.__dict__) if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']: comment = prop_def['comment'] ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), - o=Value(value=comment, is_uri=False) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), + o=make_term(comment, is_uri=False) )) # rdfs:domain (stored as 'domain' in OntologyProperty.__dict__) @@ -702,9 +710,9 @@ class Processor(FlowProcessor): domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}" ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True), - o=Value(value=domain_uri, is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True), + o=make_term(domain_uri, is_uri=True) )) # rdfs:range (stored as 'range' in OntologyProperty.__dict__) @@ -721,9 +729,9 @@ class Processor(FlowProcessor): range_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{range_val}" ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True), - o=Value(value=range_uri, is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True), + o=make_term(range_uri, is_uri=True) )) # Generate triples for datatype properties @@ -736,9 +744,9 @@ class Processor(FlowProcessor): # rdf:type owl:DatatypeProperty ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), - o=Value(value="http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True), + o=make_term("http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True) )) # rdfs:label (stored as 'labels' in OntologyProperty.__dict__) @@ -747,18 +755,18 @@ class Processor(FlowProcessor): if isinstance(labels, list) and labels: label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0]) ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=label_val, is_uri=False) + s=make_term(prop_uri, is_uri=True), + p=make_term(RDF_LABEL, is_uri=True), + o=make_term(label_val, is_uri=False) )) # rdfs:comment (stored as 'comment' in OntologyProperty.__dict__) if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']: comment = prop_def['comment'] ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), - o=Value(value=comment, is_uri=False) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True), + o=make_term(comment, is_uri=False) )) # rdfs:domain (stored as 'domain' in OntologyProperty.__dict__) @@ -775,9 +783,9 @@ class Processor(FlowProcessor): domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}" ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True), - o=Value(value=domain_uri, is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True), + o=make_term(domain_uri, is_uri=True) )) # rdfs:range (datatype) @@ -790,9 +798,9 @@ class Processor(FlowProcessor): range_uri = range_val ontology_triples.append(Triple( - s=Value(value=prop_uri, is_uri=True), - p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True), - o=Value(value=range_uri, is_uri=True) + s=make_term(prop_uri, is_uri=True), + p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True), + o=make_term(range_uri, is_uri=True) )) logger.info(f"Generated {len(ontology_triples)} triples describing ontology elements") @@ -814,9 +822,9 @@ class Processor(FlowProcessor): entity_data = {} # subject_uri -> {labels: [], definitions: []} for triple in triples: - subject_uri = triple.s.value - predicate_uri = triple.p.value - object_val = triple.o.value + subject_uri = triple.s.iri if triple.s.type == IRI else triple.s.value + predicate_uri = triple.p.iri if triple.p.type == IRI else triple.p.value + object_val = triple.o.value if triple.o.type == LITERAL else triple.o.iri # Initialize entity data if not exists if subject_uri not in entity_data: @@ -824,12 +832,12 @@ class Processor(FlowProcessor): # Collect labels (rdfs:label) if predicate_uri == RDF_LABEL: - if not triple.o.is_uri: # Labels are literals + if triple.o.type == LITERAL: # Labels are literals entity_data[subject_uri]['labels'].append(object_val) # Collect definitions (skos:definition, schema:description) elif predicate_uri == DEFINITION or predicate_uri == "https://schema.org/description": - if not triple.o.is_uri: + if triple.o.type == LITERAL: entity_data[subject_uri]['definitions'].append(object_val) # Build EntityContext objects @@ -848,7 +856,7 @@ class Processor(FlowProcessor): if context_parts: context_text = ". ".join(context_parts) entity_contexts.append(EntityContext( - entity=Value(value=subject_uri, is_uri=True), + entity=make_term(subject_uri, is_uri=True), context=context_text )) diff --git a/trustgraph-flow/trustgraph/extract/kg/ontology/triple_converter.py b/trustgraph-flow/trustgraph/extract/kg/ontology/triple_converter.py index 2eb43b19..06fff4f4 100644 --- a/trustgraph-flow/trustgraph/extract/kg/ontology/triple_converter.py +++ b/trustgraph-flow/trustgraph/extract/kg/ontology/triple_converter.py @@ -8,7 +8,7 @@ with full URIs and correct is_uri flags. import logging from typing import List, Optional -from .... schema import Triple, Value +from .... schema import Triple, Term, IRI, LITERAL from .... rdf import RDF_TYPE, RDF_LABEL from .simplified_parser import Entity, Relationship, Attribute, ExtractionResult @@ -87,17 +87,17 @@ class TripleConverter: # Generate type triple: entity rdf:type ClassURI type_triple = Triple( - s=Value(value=entity_uri, is_uri=True), - p=Value(value=RDF_TYPE, is_uri=True), - o=Value(value=class_uri, is_uri=True) + s=Term(type=IRI, iri=entity_uri), + p=Term(type=IRI, iri=RDF_TYPE), + o=Term(type=IRI, iri=class_uri) ) triples.append(type_triple) # Generate label triple: entity rdfs:label "entity name" label_triple = Triple( - s=Value(value=entity_uri, is_uri=True), - p=Value(value=RDF_LABEL, is_uri=True), - o=Value(value=entity.entity, is_uri=False) # Literal! + s=Term(type=IRI, iri=entity_uri), + p=Term(type=IRI, iri=RDF_LABEL), + o=Term(type=LITERAL, value=entity.entity) # Literal! ) triples.append(label_triple) @@ -131,9 +131,9 @@ class TripleConverter: # Generate triple: subject property object return Triple( - s=Value(value=subject_uri, is_uri=True), - p=Value(value=property_uri, is_uri=True), - o=Value(value=object_uri, is_uri=True) + s=Term(type=IRI, iri=subject_uri), + p=Term(type=IRI, iri=property_uri), + o=Term(type=IRI, iri=object_uri) ) def convert_attribute(self, attribute: Attribute) -> Optional[Triple]: @@ -159,9 +159,9 @@ class TripleConverter: # Generate triple: entity property "literal value" return Triple( - s=Value(value=entity_uri, is_uri=True), - p=Value(value=property_uri, is_uri=True), - o=Value(value=attribute.value, is_uri=False) # Literal! + s=Term(type=IRI, iri=entity_uri), + p=Term(type=IRI, iri=property_uri), + o=Term(type=LITERAL, value=attribute.value) # Literal! ) def _get_class_uri(self, class_id: str) -> Optional[str]: diff --git a/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py b/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py index 6d461997..be0bd705 100755 --- a/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py +++ b/trustgraph-flow/trustgraph/extract/kg/relationships/extract.py @@ -13,15 +13,15 @@ import urllib.parse logger = logging.getLogger(__name__) from .... schema import Chunk, Triple, Triples -from .... schema import Metadata, Value +from .... schema import Metadata, Term, IRI, LITERAL from .... schema import PromptRequest, PromptResponse from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF from .... base import FlowProcessor, ConsumerSpec, ProducerSpec from .... base import PromptClientSpec -RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True) -SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True) +RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL) +SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF) default_ident = "kg-extract-relationships" default_concurrency = 1 @@ -127,16 +127,16 @@ class Processor(FlowProcessor): if o is None: continue s_uri = self.to_uri(s) - s_value = Value(value=str(s_uri), is_uri=True) + s_value = Term(type=IRI, iri=str(s_uri)) p_uri = self.to_uri(p) - p_value = Value(value=str(p_uri), is_uri=True) + p_value = Term(type=IRI, iri=str(p_uri)) - if rel["object-entity"]: + if rel["object-entity"]: o_uri = self.to_uri(o) - o_value = Value(value=str(o_uri), is_uri=True) + o_value = Term(type=IRI, iri=str(o_uri)) else: - o_value = Value(value=str(o), is_uri=False) + o_value = Term(type=LITERAL, value=str(o)) triples.append(Triple( s=s_value, @@ -148,14 +148,14 @@ class Processor(FlowProcessor): triples.append(Triple( s=s_value, p=RDF_LABEL_VALUE, - o=Value(value=str(s), is_uri=False) + o=Term(type=LITERAL, value=str(s)) )) # Label for p triples.append(Triple( s=p_value, p=RDF_LABEL_VALUE, - o=Value(value=str(p), is_uri=False) + o=Term(type=LITERAL, value=str(p)) )) if rel["object-entity"]: @@ -163,14 +163,14 @@ class Processor(FlowProcessor): triples.append(Triple( s=o_value, p=RDF_LABEL_VALUE, - o=Value(value=str(o), is_uri=False) + o=Term(type=LITERAL, value=str(o)) )) # 'Subject of' for s triples.append(Triple( s=s_value, p=SUBJECT_OF_VALUE, - o=Value(value=v.metadata.id, is_uri=True) + o=Term(type=IRI, iri=v.metadata.id) )) if rel["object-entity"]: @@ -178,7 +178,7 @@ class Processor(FlowProcessor): triples.append(Triple( s=o_value, p=SUBJECT_OF_VALUE, - o=Value(value=v.metadata.id, is_uri=True) + o=Term(type=IRI, iri=v.metadata.id) )) await self.emit_triples( diff --git a/trustgraph-flow/trustgraph/extract/kg/topics/extract.py b/trustgraph-flow/trustgraph/extract/kg/topics/extract.py index 129cc64c..206d14d0 100755 --- a/trustgraph-flow/trustgraph/extract/kg/topics/extract.py +++ b/trustgraph-flow/trustgraph/extract/kg/topics/extract.py @@ -11,7 +11,7 @@ import logging # Module logger logger = logging.getLogger(__name__) -from .... schema import Chunk, Triple, Triples, Metadata, Value +from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL from .... schema import chunk_ingest_queue, triples_store_queue from .... schema import prompt_request_queue from .... schema import prompt_response_queue @@ -20,7 +20,7 @@ from .... clients.prompt_client import PromptClient from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION from .... base import ConsumerProducer -DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True) +DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION) module = "kg-extract-topics" @@ -106,8 +106,8 @@ class Processor(ConsumerProducer): s_uri = self.to_uri(s) - s_value = Value(value=str(s_uri), is_uri=True) - o_value = Value(value=str(o), is_uri=False) + s_value = Term(type=IRI, iri=str(s_uri)) + o_value = Term(type=LITERAL, value=str(o)) await self.emit_edge( v.metadata, s_value, DEFINITION_VALUE, o_value diff --git a/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py index 653ecfd9..8f1cdece 100644 --- a/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py +++ b/trustgraph-flow/trustgraph/gateway/dispatch/serialize.py @@ -1,46 +1,37 @@ import base64 -from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata +from ... schema import Term, Triple, DocumentMetadata, ProcessingMetadata +from ... messaging.translators.primitives import TermTranslator, TripleTranslator + +# Singleton translator instances +_term_translator = TermTranslator() +_triple_translator = TripleTranslator() -# DEPRECATED: These functions have been moved to trustgraph.... messaging.translators -# Use the new messaging translation system instead for consistency and reusability. -# Examples: -# from trustgraph.... messaging.translators.primitives import ValueTranslator -# value_translator = ValueTranslator() -# pulsar_value = value_translator.to_pulsar({"v": "example", "e": True}) def to_value(x): - return Value(value=x["v"], is_uri=x["e"]) + """Convert dict to Term. Delegates to TermTranslator.""" + return _term_translator.to_pulsar(x) + def to_subgraph(x): - return [ - Triple( - s=to_value(t["s"]), - p=to_value(t["p"]), - o=to_value(t["o"]) - ) - for t in x - ] + """Convert list of dicts to list of Triples. Delegates to TripleTranslator.""" + return [_triple_translator.to_pulsar(t) for t in x] + def serialize_value(v): - return { - "v": v.value, - "e": v.is_uri, - } + """Convert Term to dict. Delegates to TermTranslator.""" + return _term_translator.from_pulsar(v) + def serialize_triple(t): - return { - "s": serialize_value(t.s), - "p": serialize_value(t.p), - "o": serialize_value(t.o) - } + """Convert Triple to dict. Delegates to TripleTranslator.""" + return _triple_translator.from_pulsar(t) + def serialize_subgraph(sg): - return [ - serialize_triple(t) - for t in sg - ] + """Convert list of Triples to list of dicts.""" + return [serialize_triple(t) for t in sg] def serialize_triples(message): return { diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py index 2915184c..7f269329 100755 --- a/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py +++ b/trustgraph-flow/trustgraph/query/doc_embeddings/milvus/service.py @@ -8,7 +8,7 @@ import logging from .... direct.milvus_doc_embeddings import DocVectors from .... schema import DocumentEmbeddingsResponse -from .... schema import Error, Value +from .... schema import Error from .... base import DocumentEmbeddingsQueryService # Module logger diff --git a/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py b/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py index 46e9e687..16f77ad5 100755 --- a/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py +++ b/trustgraph-flow/trustgraph/query/doc_embeddings/qdrant/service.py @@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct from qdrant_client.models import Distance, VectorParams from .... schema import DocumentEmbeddingsResponse -from .... schema import Error, Value +from .... schema import Error from .... base import DocumentEmbeddingsQueryService # Module logger diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py index cb9255c2..3ee34ba7 100755 --- a/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py +++ b/trustgraph-flow/trustgraph/query/graph_embeddings/milvus/service.py @@ -8,7 +8,7 @@ import logging from .... direct.milvus_graph_embeddings import EntityVectors from .... schema import GraphEmbeddingsResponse -from .... schema import Error, Value +from .... schema import Error, Term, IRI, LITERAL from .... base import GraphEmbeddingsQueryService # Module logger @@ -33,9 +33,9 @@ class Processor(GraphEmbeddingsQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_graph_embeddings(self, msg): diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py index f6277e4f..c90f64dd 100755 --- a/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py +++ b/trustgraph-flow/trustgraph/query/graph_embeddings/pinecone/service.py @@ -12,7 +12,7 @@ from pinecone import Pinecone, ServerlessSpec from pinecone.grpc import PineconeGRPC, GRPCClientConfig from .... schema import GraphEmbeddingsResponse -from .... schema import Error, Value +from .... schema import Error, Term, IRI, LITERAL from .... base import GraphEmbeddingsQueryService # Module logger @@ -51,9 +51,9 @@ class Processor(GraphEmbeddingsQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_graph_embeddings(self, msg): diff --git a/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py b/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py index 513fd2e4..bf11e1c9 100755 --- a/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py +++ b/trustgraph-flow/trustgraph/query/graph_embeddings/qdrant/service.py @@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct from qdrant_client.models import Distance, VectorParams from .... schema import GraphEmbeddingsResponse -from .... schema import Error, Value +from .... schema import Error, Term, IRI, LITERAL from .... base import GraphEmbeddingsQueryService # Module logger @@ -67,9 +67,9 @@ class Processor(GraphEmbeddingsQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_graph_embeddings(self, msg): diff --git a/trustgraph-flow/trustgraph/query/triples/cassandra/service.py b/trustgraph-flow/trustgraph/query/triples/cassandra/service.py index 13726ac3..51bbefa9 100755 --- a/trustgraph-flow/trustgraph/query/triples/cassandra/service.py +++ b/trustgraph-flow/trustgraph/query/triples/cassandra/service.py @@ -1,14 +1,14 @@ """ -Triples query service. Input is a (s, p, o) triple, some values may be -null. Output is a list of triples. +Triples query service. Input is a (s, p, o, g) quad pattern, some values may be +null. Output is a list of quads. """ import logging -from .... direct.cassandra_kg import KnowledgeGraph +from .... direct.cassandra_kg import KnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error -from .... schema import Value, Triple +from .... schema import Term, Triple, IRI, LITERAL from .... base import TriplesQueryService from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config @@ -18,6 +18,27 @@ logger = logging.getLogger(__name__) default_ident = "triples-query" +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + +def create_term(value): + """Create a Term from a string value""" + if value.startswith("http://") or value.startswith("https://"): + return Term(type=IRI, iri=value) + else: + return Term(type=LITERAL, value=value) + + class Processor(TriplesQueryService): def __init__(self, **params): @@ -46,12 +67,6 @@ class Processor(TriplesQueryService): self.cassandra_password = password self.table = None - def create_value(self, ent): - if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) - else: - return Value(value=ent, is_uri=False) - async def query_triples(self, query): try: @@ -72,77 +87,103 @@ class Processor(TriplesQueryService): ) self.table = user - triples = [] + # Extract values from query + s_val = get_term_value(query.s) + p_val = get_term_value(query.p) + o_val = get_term_value(query.o) + g_val = query.g # Already a string or None - if query.s is not None: - if query.p is not None: - if query.o is not None: + quads = [] + + # Route to appropriate query method based on which fields are specified + if s_val is not None: + if p_val is not None: + if o_val is not None: + # SPO specified - find matching graphs resp = self.tg.get_spo( - query.collection, query.s.value, query.p.value, query.o.value, + query.collection, s_val, p_val, o_val, g=g_val, limit=query.limit ) - triples.append((query.s.value, query.p.value, query.o.value)) + for t in resp: + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((s_val, p_val, o_val, g)) else: + # SP specified resp = self.tg.get_sp( - query.collection, query.s.value, query.p.value, + query.collection, s_val, p_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((query.s.value, query.p.value, t.o)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((s_val, p_val, t.o, g)) else: - if query.o is not None: + if o_val is not None: + # SO specified resp = self.tg.get_os( - query.collection, query.o.value, query.s.value, + query.collection, o_val, s_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((query.s.value, t.p, query.o.value)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((s_val, t.p, o_val, g)) else: + # S only resp = self.tg.get_s( - query.collection, query.s.value, + query.collection, s_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((query.s.value, t.p, t.o)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((s_val, t.p, t.o, g)) else: - if query.p is not None: - if query.o is not None: + if p_val is not None: + if o_val is not None: + # PO specified resp = self.tg.get_po( - query.collection, query.p.value, query.o.value, + query.collection, p_val, o_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((t.s, query.p.value, query.o.value)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((t.s, p_val, o_val, g)) else: + # P only resp = self.tg.get_p( - query.collection, query.p.value, + query.collection, p_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((t.s, query.p.value, t.o)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((t.s, p_val, t.o, g)) else: - if query.o is not None: + if o_val is not None: + # O only resp = self.tg.get_o( - query.collection, query.o.value, + query.collection, o_val, g=g_val, limit=query.limit ) for t in resp: - triples.append((t.s, t.p, query.o.value)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((t.s, t.p, o_val, g)) else: + # Nothing specified - get all resp = self.tg.get_all( query.collection, limit=query.limit ) for t in resp: - triples.append((t.s, t.p, t.o)) + g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH + quads.append((t.s, t.p, t.o, g)) + # Convert to Triple objects (with g field) triples = [ Triple( - s=self.create_value(t[0]), - p=self.create_value(t[1]), - o=self.create_value(t[2]) + s=create_term(q[0]), + p=create_term(q[1]), + o=create_term(q[2]), + g=q[3] if q[3] != DEFAULT_GRAPH else None ) - for t in triples + for q in quads ] return triples @@ -162,4 +203,3 @@ class Processor(TriplesQueryService): def run(): Processor.launch(default_ident, __doc__) - diff --git a/trustgraph-flow/trustgraph/query/triples/falkordb/service.py b/trustgraph-flow/trustgraph/query/triples/falkordb/service.py index d1c7be7d..14b24d52 100755 --- a/trustgraph-flow/trustgraph/query/triples/falkordb/service.py +++ b/trustgraph-flow/trustgraph/query/triples/falkordb/service.py @@ -10,12 +10,24 @@ import logging from falkordb import FalkorDB from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error -from .... schema import Value, Triple +from .... schema import Term, Triple, IRI, LITERAL from .... base import TriplesQueryService # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + return term.id or term.value + default_ident = "triples-query" default_graph_url = 'falkor://falkordb:6379' @@ -42,9 +54,9 @@ class Processor(TriplesQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_triples(self, query): @@ -63,28 +75,28 @@ class Processor(TriplesQueryService): "RETURN $src as src " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "rel": query.p.value, - "value": query.o.value, + "src": get_term_value(query.s), + "rel": get_term_value(query.p), + "value": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) records = self.io.query( "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) " "RETURN $src as src " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "rel": query.p.value, - "uri": query.o.value, + "src": get_term_value(query.s), + "rel": get_term_value(query.p), + "uri": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) else: @@ -95,26 +107,26 @@ class Processor(TriplesQueryService): "RETURN dest.value as dest " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "rel": query.p.value, + "src": get_term_value(query.s), + "rel": get_term_value(query.p), }, ).result_set for rec in records: - triples.append((query.s.value, query.p.value, rec[0])) + triples.append((get_term_value(query.s), get_term_value(query.p), rec[0])) records = self.io.query( "MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) " "RETURN dest.uri as dest " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "rel": query.p.value, + "src": get_term_value(query.s), + "rel": get_term_value(query.p), }, ).result_set for rec in records: - triples.append((query.s.value, query.p.value, rec[0])) + triples.append((get_term_value(query.s), get_term_value(query.p), rec[0])) else: @@ -127,26 +139,26 @@ class Processor(TriplesQueryService): "RETURN rel.uri as rel " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "value": query.o.value, + "src": get_term_value(query.s), + "value": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((query.s.value, rec[0], query.o.value)) + triples.append((get_term_value(query.s), rec[0], get_term_value(query.o))) records = self.io.query( "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) " "RETURN rel.uri as rel " "LIMIT " + str(query.limit), params={ - "src": query.s.value, - "uri": query.o.value, + "src": get_term_value(query.s), + "uri": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((query.s.value, rec[0], query.o.value)) + triples.append((get_term_value(query.s), rec[0], get_term_value(query.o))) else: @@ -157,24 +169,24 @@ class Processor(TriplesQueryService): "RETURN rel.uri as rel, dest.value as dest " "LIMIT " + str(query.limit), params={ - "src": query.s.value, + "src": get_term_value(query.s), }, ).result_set for rec in records: - triples.append((query.s.value, rec[0], rec[1])) + triples.append((get_term_value(query.s), rec[0], rec[1])) records = self.io.query( "MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) " "RETURN rel.uri as rel, dest.uri as dest " "LIMIT " + str(query.limit), params={ - "src": query.s.value, + "src": get_term_value(query.s), }, ).result_set for rec in records: - triples.append((query.s.value, rec[0], rec[1])) + triples.append((get_term_value(query.s), rec[0], rec[1])) else: @@ -190,26 +202,26 @@ class Processor(TriplesQueryService): "RETURN src.uri as src " "LIMIT " + str(query.limit), params={ - "uri": query.p.value, - "value": query.o.value, + "uri": get_term_value(query.p), + "value": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((rec[0], query.p.value, query.o.value)) + triples.append((rec[0], get_term_value(query.p), get_term_value(query.o))) records = self.io.query( "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $dest}) " "RETURN src.uri as src " "LIMIT " + str(query.limit), params={ - "uri": query.p.value, - "dest": query.o.value, + "uri": get_term_value(query.p), + "dest": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((rec[0], query.p.value, query.o.value)) + triples.append((rec[0], get_term_value(query.p), get_term_value(query.o))) else: @@ -220,24 +232,24 @@ class Processor(TriplesQueryService): "RETURN src.uri as src, dest.value as dest " "LIMIT " + str(query.limit), params={ - "uri": query.p.value, + "uri": get_term_value(query.p), }, ).result_set for rec in records: - triples.append((rec[0], query.p.value, rec[1])) + triples.append((rec[0], get_term_value(query.p), rec[1])) records = self.io.query( "MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) " "RETURN src.uri as src, dest.uri as dest " "LIMIT " + str(query.limit), params={ - "uri": query.p.value, + "uri": get_term_value(query.p), }, ).result_set for rec in records: - triples.append((rec[0], query.p.value, rec[1])) + triples.append((rec[0], get_term_value(query.p), rec[1])) else: @@ -250,24 +262,24 @@ class Processor(TriplesQueryService): "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), params={ - "value": query.o.value, + "value": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((rec[0], rec[1], query.o.value)) + triples.append((rec[0], rec[1], get_term_value(query.o))) records = self.io.query( "MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) " "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), params={ - "uri": query.o.value, + "uri": get_term_value(query.o), }, ).result_set for rec in records: - triples.append((rec[0], rec[1], query.o.value)) + triples.append((rec[0], rec[1], get_term_value(query.o))) else: diff --git a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py index 262f89ab..37633f34 100755 --- a/trustgraph-flow/trustgraph/query/triples/memgraph/service.py +++ b/trustgraph-flow/trustgraph/query/triples/memgraph/service.py @@ -10,12 +10,24 @@ import logging from neo4j import GraphDatabase from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error -from .... schema import Value, Triple +from .... schema import Term, Triple, IRI, LITERAL from .... base import TriplesQueryService # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + return term.id or term.value + default_ident = "triples-query" default_graph_host = 'bolt://memgraph:7687' @@ -47,9 +59,9 @@ class Processor(TriplesQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_triples(self, query): @@ -73,13 +85,13 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN $src as src " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, value=query.o.value, + src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -87,13 +99,13 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN $src as src " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, uri=query.o.value, + src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) else: @@ -105,14 +117,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN dest.value as dest " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, + src=get_term_value(query.s), rel=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, query.p.value, data["dest"])) + triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -120,14 +132,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN dest.uri as dest " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, + src=get_term_value(query.s), rel=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, query.p.value, data["dest"])) + triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"])) else: @@ -141,14 +153,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN rel.uri as rel " "LIMIT " + str(query.limit), - src=query.s.value, value=query.o.value, + src=get_term_value(query.s), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], query.o.value)) + triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -156,14 +168,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN rel.uri as rel " "LIMIT " + str(query.limit), - src=query.s.value, uri=query.o.value, + src=get_term_value(query.s), uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], query.o.value)) + triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o))) else: @@ -175,14 +187,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN rel.uri as rel, dest.value as dest " "LIMIT " + str(query.limit), - src=query.s.value, + src=get_term_value(query.s), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], data["dest"])) + triples.append((get_term_value(query.s), data["rel"], data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -190,14 +202,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN rel.uri as rel, dest.uri as dest " "LIMIT " + str(query.limit), - src=query.s.value, + src=get_term_value(query.s), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], data["dest"])) + triples.append((get_term_value(query.s), data["rel"], data["dest"])) else: @@ -214,14 +226,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN src.uri as src " "LIMIT " + str(query.limit), - uri=query.p.value, value=query.o.value, + uri=get_term_value(query.p), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, query.o.value)) + triples.append((data["src"], get_term_value(query.p), get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -229,14 +241,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $dest, user: $user, collection: $collection}) " "RETURN src.uri as src " "LIMIT " + str(query.limit), - uri=query.p.value, dest=query.o.value, + uri=get_term_value(query.p), dest=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, query.o.value)) + triples.append((data["src"], get_term_value(query.p), get_term_value(query.o))) else: @@ -248,14 +260,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN src.uri as src, dest.value as dest " "LIMIT " + str(query.limit), - uri=query.p.value, + uri=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, data["dest"])) + triples.append((data["src"], get_term_value(query.p), data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -263,14 +275,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN src.uri as src, dest.uri as dest " "LIMIT " + str(query.limit), - uri=query.p.value, + uri=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, data["dest"])) + triples.append((data["src"], get_term_value(query.p), data["dest"])) else: @@ -284,14 +296,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), - value=query.o.value, + value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], data["rel"], query.o.value)) + triples.append((data["src"], data["rel"], get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -299,14 +311,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), - uri=query.o.value, + uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], data["rel"], query.o.value)) + triples.append((data["src"], data["rel"], get_term_value(query.o))) else: diff --git a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py index 8c513a67..4cb1ab21 100755 --- a/trustgraph-flow/trustgraph/query/triples/neo4j/service.py +++ b/trustgraph-flow/trustgraph/query/triples/neo4j/service.py @@ -10,12 +10,24 @@ import logging from neo4j import GraphDatabase from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error -from .... schema import Value, Triple +from .... schema import Term, Triple, IRI, LITERAL from .... base import TriplesQueryService # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + return term.id or term.value + default_ident = "triples-query" default_graph_host = 'bolt://neo4j:7687' @@ -47,9 +59,9 @@ class Processor(TriplesQueryService): def create_value(self, ent): if ent.startswith("http://") or ent.startswith("https://"): - return Value(value=ent, is_uri=True) + return Term(type=IRI, iri=ent) else: - return Value(value=ent, is_uri=False) + return Term(type=LITERAL, value=ent) async def query_triples(self, query): @@ -73,13 +85,13 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN $src as src " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, value=query.o.value, + src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -87,13 +99,13 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN $src as src " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, uri=query.o.value, + src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: - triples.append((query.s.value, query.p.value, query.o.value)) + triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o))) else: @@ -105,14 +117,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN dest.value as dest " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, + src=get_term_value(query.s), rel=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, query.p.value, data["dest"])) + triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -120,14 +132,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN dest.uri as dest " "LIMIT " + str(query.limit), - src=query.s.value, rel=query.p.value, + src=get_term_value(query.s), rel=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, query.p.value, data["dest"])) + triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"])) else: @@ -141,14 +153,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN rel.uri as rel " "LIMIT " + str(query.limit), - src=query.s.value, value=query.o.value, + src=get_term_value(query.s), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], query.o.value)) + triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -156,14 +168,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN rel.uri as rel " "LIMIT " + str(query.limit), - src=query.s.value, uri=query.o.value, + src=get_term_value(query.s), uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], query.o.value)) + triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o))) else: @@ -175,14 +187,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN rel.uri as rel, dest.value as dest " "LIMIT " + str(query.limit), - src=query.s.value, + src=get_term_value(query.s), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], data["dest"])) + triples.append((get_term_value(query.s), data["rel"], data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {uri: $src, user: $user, collection: $collection})-" @@ -190,14 +202,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN rel.uri as rel, dest.uri as dest " "LIMIT " + str(query.limit), - src=query.s.value, + src=get_term_value(query.s), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((query.s.value, data["rel"], data["dest"])) + triples.append((get_term_value(query.s), data["rel"], data["dest"])) else: @@ -214,14 +226,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN src.uri as src " "LIMIT " + str(query.limit), - uri=query.p.value, value=query.o.value, + uri=get_term_value(query.p), value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, query.o.value)) + triples.append((data["src"], get_term_value(query.p), get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -229,14 +241,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $dest, user: $user, collection: $collection}) " "RETURN src.uri as src " "LIMIT " + str(query.limit), - uri=query.p.value, dest=query.o.value, + uri=get_term_value(query.p), dest=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, query.o.value)) + triples.append((data["src"], get_term_value(query.p), get_term_value(query.o))) else: @@ -248,14 +260,14 @@ class Processor(TriplesQueryService): "(dest:Literal {user: $user, collection: $collection}) " "RETURN src.uri as src, dest.value as dest " "LIMIT " + str(query.limit), - uri=query.p.value, + uri=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, data["dest"])) + triples.append((data["src"], get_term_value(query.p), data["dest"])) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -263,14 +275,14 @@ class Processor(TriplesQueryService): "(dest:Node {user: $user, collection: $collection}) " "RETURN src.uri as src, dest.uri as dest " "LIMIT " + str(query.limit), - uri=query.p.value, + uri=get_term_value(query.p), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], query.p.value, data["dest"])) + triples.append((data["src"], get_term_value(query.p), data["dest"])) else: @@ -284,14 +296,14 @@ class Processor(TriplesQueryService): "(dest:Literal {value: $value, user: $user, collection: $collection}) " "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), - value=query.o.value, + value=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], data["rel"], query.o.value)) + triples.append((data["src"], data["rel"], get_term_value(query.o))) records, summary, keys = self.io.execute_query( "MATCH (src:Node {user: $user, collection: $collection})-" @@ -299,14 +311,14 @@ class Processor(TriplesQueryService): "(dest:Node {uri: $uri, user: $user, collection: $collection}) " "RETURN src.uri as src, rel.uri as rel " "LIMIT " + str(query.limit), - uri=query.o.value, + uri=get_term_value(query.o), user=user, collection=collection, database_=self.db, ) for rec in records: data = rec.data() - triples.append((data["src"], data["rel"], query.o.value)) + triples.append((data["src"], data["rel"], get_term_value(query.o))) else: diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py index 2e192cd6..148e866a 100755 --- a/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py +++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py @@ -9,10 +9,24 @@ from .... direct.milvus_graph_embeddings import EntityVectors from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + default_ident = "ge-write" default_store_uri = 'http://localhost:19530' @@ -36,11 +50,12 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService): async def store_graph_embeddings(self, message): for entity in message.entities: + entity_value = get_term_value(entity.entity) - if entity.entity.value != "" and entity.entity.value is not None: + if entity_value != "" and entity_value is not None: for vec in entity.vectors: self.vecstore.insert( - vec, entity.entity.value, + vec, entity_value, message.metadata.user, message.metadata.collection ) diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py index 0bee6ceb..c92d7661 100755 --- a/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py +++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/pinecone/write.py @@ -14,10 +14,24 @@ import logging from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + default_ident = "ge-write" default_api_key = os.getenv("PINECONE_API_KEY", "not-specified") default_cloud = "aws" @@ -100,8 +114,9 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService): return for entity in message.entities: + entity_value = get_term_value(entity.entity) - if entity.entity.value == "" or entity.entity.value is None: + if entity_value == "" or entity_value is None: continue for vec in entity.vectors: @@ -126,7 +141,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService): { "id": vector_id, "values": vec, - "metadata": { "entity": entity.entity.value }, + "metadata": { "entity": entity_value }, } ] diff --git a/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py b/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py index e3c2b6bc..bdc5fa70 100755 --- a/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py +++ b/trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py @@ -12,10 +12,25 @@ import logging from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + default_ident = "ge-write" default_store_uri = 'http://localhost:6333' @@ -51,8 +66,10 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService): return for entity in message.entities: + entity_value = get_term_value(entity.entity) - if entity.entity.value == "" or entity.entity.value is None: return + if entity_value == "" or entity_value is None: + continue for vec in entity.vectors: @@ -80,7 +97,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService): id=str(uuid.uuid4()), vector=vec, payload={ - "entity": entity.entity.value, + "entity": entity_value, } ) ] diff --git a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py index b9b42375..6ed93b7d 100755 --- a/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/cassandra/write.py @@ -10,11 +10,12 @@ import argparse import time import logging -from .... direct.cassandra_kg import KnowledgeGraph +from .... direct.cassandra_kg import KnowledgeGraph, DEFAULT_GRAPH from .... base import TriplesStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) @@ -22,6 +23,19 @@ logger = logging.getLogger(__name__) default_ident = "triples-write" +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + class Processor(CollectionConfigHandler, TriplesStoreService): def __init__(self, **params): @@ -84,11 +98,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService): self.table = user for t in message.triples: + # Extract values from Term objects + s_val = get_term_value(t.s) + p_val = get_term_value(t.p) + o_val = get_term_value(t.o) + # t.g is None for default graph, or a graph IRI + g_val = t.g if t.g is not None else DEFAULT_GRAPH + self.tg.insert( message.metadata.collection, - t.s.value, - t.p.value, - t.o.value + s_val, + p_val, + o_val, + g=g_val ) async def create_collection(self, user: str, collection: str, metadata: dict): diff --git a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py index f08eeb91..210ea53d 100755 --- a/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/falkordb/write.py @@ -15,12 +15,27 @@ from falkordb import FalkorDB from .... base import TriplesStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) default_ident = "triples-write" + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + default_graph_url = 'falkor://falkordb:6379' default_database = 'falkordb' @@ -164,14 +179,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService): for t in message.triples: - self.create_node(t.s.value, user, collection) + s_val = get_term_value(t.s) + p_val = get_term_value(t.p) + o_val = get_term_value(t.o) - if t.o.is_uri: - self.create_node(t.o.value, user, collection) - self.relate_node(t.s.value, t.p.value, t.o.value, user, collection) + self.create_node(s_val, user, collection) + + if t.o.type == IRI: + self.create_node(o_val, user, collection) + self.relate_node(s_val, p_val, o_val, user, collection) else: - self.create_literal(t.o.value, user, collection) - self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection) + self.create_literal(o_val, user, collection) + self.relate_literal(s_val, p_val, o_val, user, collection) @staticmethod def add_args(parser): diff --git a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py index 8105b14e..55d4dee1 100755 --- a/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/memgraph/write.py @@ -15,12 +15,27 @@ from neo4j import GraphDatabase from .... base import TriplesStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) default_ident = "triples-write" + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + default_graph_host = 'bolt://memgraph:7687' default_username = 'memgraph' default_password = 'password' @@ -204,40 +219,44 @@ class Processor(CollectionConfigHandler, TriplesStoreService): def create_triple(self, tx, t, user, collection): + s_val = get_term_value(t.s) + p_val = get_term_value(t.p) + o_val = get_term_value(t.o) + # Create new s node with given uri, if not exists result = tx.run( "MERGE (n:Node {uri: $uri, user: $user, collection: $collection})", - uri=t.s.value, user=user, collection=collection + uri=s_val, user=user, collection=collection ) - if t.o.is_uri: + if t.o.type == IRI: # Create new o node with given uri, if not exists result = tx.run( "MERGE (n:Node {uri: $uri, user: $user, collection: $collection})", - uri=t.o.value, user=user, collection=collection + uri=o_val, user=user, collection=collection ) result = tx.run( "MATCH (src:Node {uri: $src, user: $user, collection: $collection}) " "MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) " "MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)", - src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection, + src=s_val, dest=o_val, uri=p_val, user=user, collection=collection, ) else: - + # Create new o literal with given uri, if not exists result = tx.run( "MERGE (n:Literal {value: $value, user: $user, collection: $collection})", - value=t.o.value, user=user, collection=collection + value=o_val, user=user, collection=collection ) result = tx.run( "MATCH (src:Node {uri: $src, user: $user, collection: $collection}) " "MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) " "MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)", - src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection, + src=s_val, dest=o_val, uri=p_val, user=user, collection=collection, ) async def store_triples(self, message): @@ -257,14 +276,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService): for t in message.triples: - self.create_node(t.s.value, user, collection) + s_val = get_term_value(t.s) + p_val = get_term_value(t.p) + o_val = get_term_value(t.o) - if t.o.is_uri: - self.create_node(t.o.value, user, collection) - self.relate_node(t.s.value, t.p.value, t.o.value, user, collection) + self.create_node(s_val, user, collection) + + if t.o.type == IRI: + self.create_node(o_val, user, collection) + self.relate_node(s_val, p_val, o_val, user, collection) else: - self.create_literal(t.o.value, user, collection) - self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection) + self.create_literal(o_val, user, collection) + self.relate_literal(s_val, p_val, o_val, user, collection) # Alternative implementation using transactions # with self.io.session(database=self.db) as session: diff --git a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py index e33b26ca..4a85a273 100755 --- a/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py +++ b/trustgraph-flow/trustgraph/storage/triples/neo4j/write.py @@ -14,12 +14,27 @@ from neo4j import GraphDatabase from .... base import TriplesStoreService, CollectionConfigHandler from .... base import AsyncProcessor, Consumer, Producer from .... base import ConsumerMetrics, ProducerMetrics +from .... schema import IRI, LITERAL # Module logger logger = logging.getLogger(__name__) default_ident = "triples-write" + +def get_term_value(term): + """Extract the string value from a Term""" + if term is None: + return None + if term.type == IRI: + return term.iri + elif term.type == LITERAL: + return term.value + else: + # For blank nodes or other types, use id or value + return term.id or term.value + + default_graph_host = 'bolt://neo4j:7687' default_username = 'neo4j' default_password = 'password' @@ -212,14 +227,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService): for t in message.triples: - self.create_node(t.s.value, user, collection) + s_val = get_term_value(t.s) + p_val = get_term_value(t.p) + o_val = get_term_value(t.o) - if t.o.is_uri: - self.create_node(t.o.value, user, collection) - self.relate_node(t.s.value, t.p.value, t.o.value, user, collection) + self.create_node(s_val, user, collection) + + if t.o.type == IRI: + self.create_node(o_val, user, collection) + self.relate_node(s_val, p_val, o_val, user, collection) else: - self.create_literal(t.o.value, user, collection) - self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection) + self.create_literal(o_val, user, collection) + self.relate_literal(s_val, p_val, o_val, user, collection) @staticmethod def add_args(parser): diff --git a/trustgraph-flow/trustgraph/tables/config.py b/trustgraph-flow/trustgraph/tables/config.py index f98929e1..fb9ea0a7 100644 --- a/trustgraph-flow/trustgraph/tables/config.py +++ b/trustgraph-flow/trustgraph/tables/config.py @@ -1,6 +1,6 @@ from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings -from .. schema import Metadata, Value, GraphEmbeddings +from .. schema import Metadata, GraphEmbeddings from cassandra.cluster import Cluster from cassandra.auth import PlainTextAuthProvider diff --git a/trustgraph-flow/trustgraph/tables/knowledge.py b/trustgraph-flow/trustgraph/tables/knowledge.py index 1ee61088..0e5d4e1d 100644 --- a/trustgraph-flow/trustgraph/tables/knowledge.py +++ b/trustgraph-flow/trustgraph/tables/knowledge.py @@ -1,8 +1,24 @@ from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings -from .. schema import Metadata, Value, GraphEmbeddings +from .. schema import Metadata, Term, IRI, LITERAL, GraphEmbeddings from cassandra.cluster import Cluster + + +def term_to_tuple(term): + """Convert Term to (value, is_uri) tuple for database storage.""" + if term.type == IRI: + return (term.iri, True) + else: # LITERAL + return (term.value, False) + + +def tuple_to_term(value, is_uri): + """Convert (value, is_uri) tuple from database to Term.""" + if is_uri: + return Term(type=IRI, iri=value) + else: + return Term(type=LITERAL, value=value) from cassandra.auth import PlainTextAuthProvider from ssl import SSLContext, PROTOCOL_TLSv1_2 @@ -205,8 +221,7 @@ class KnowledgeTableStore: if m.metadata.metadata: metadata = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in m.metadata.metadata ] @@ -215,8 +230,7 @@ class KnowledgeTableStore: triples = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in m.triples ] @@ -248,8 +262,7 @@ class KnowledgeTableStore: if m.metadata.metadata: metadata = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in m.metadata.metadata ] @@ -258,7 +271,7 @@ class KnowledgeTableStore: entities = [ ( - (v.entity.value, v.entity.is_uri), + term_to_tuple(v.entity), v.vectors ) for v in m.entities @@ -291,8 +304,7 @@ class KnowledgeTableStore: if m.metadata.metadata: metadata = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in m.metadata.metadata ] @@ -414,9 +426,9 @@ class KnowledgeTableStore: if row[2]: metadata = [ Triple( - s = Value(value = elt[0], is_uri = elt[1]), - p = Value(value = elt[2], is_uri = elt[3]), - o = Value(value = elt[4], is_uri = elt[5]), + s = tuple_to_term(elt[0], elt[1]), + p = tuple_to_term(elt[2], elt[3]), + o = tuple_to_term(elt[4], elt[5]), ) for elt in row[2] ] @@ -425,9 +437,9 @@ class KnowledgeTableStore: triples = [ Triple( - s = Value(value = elt[0], is_uri = elt[1]), - p = Value(value = elt[2], is_uri = elt[3]), - o = Value(value = elt[4], is_uri = elt[5]), + s = tuple_to_term(elt[0], elt[1]), + p = tuple_to_term(elt[2], elt[3]), + o = tuple_to_term(elt[4], elt[5]), ) for elt in row[3] ] @@ -470,9 +482,9 @@ class KnowledgeTableStore: if row[2]: metadata = [ Triple( - s = Value(value = elt[0], is_uri = elt[1]), - p = Value(value = elt[2], is_uri = elt[3]), - o = Value(value = elt[4], is_uri = elt[5]), + s = tuple_to_term(elt[0], elt[1]), + p = tuple_to_term(elt[2], elt[3]), + o = tuple_to_term(elt[4], elt[5]), ) for elt in row[2] ] @@ -481,7 +493,7 @@ class KnowledgeTableStore: entities = [ EntityEmbeddings( - entity = Value(value = ent[0][0], is_uri = ent[0][1]), + entity = tuple_to_term(ent[0][0], ent[0][1]), vectors = ent[1] ) for ent in row[3] diff --git a/trustgraph-flow/trustgraph/tables/library.py b/trustgraph-flow/trustgraph/tables/library.py index 0a7c6081..8bbe2bad 100644 --- a/trustgraph-flow/trustgraph/tables/library.py +++ b/trustgraph-flow/trustgraph/tables/library.py @@ -1,8 +1,24 @@ from .. schema import LibrarianRequest, LibrarianResponse from .. schema import DocumentMetadata, ProcessingMetadata -from .. schema import Error, Triple, Value +from .. schema import Error, Triple, Term, IRI, LITERAL from .. knowledge import hash + + +def term_to_tuple(term): + """Convert Term to (value, is_uri) tuple for database storage.""" + if term.type == IRI: + return (term.iri, True) + else: # LITERAL + return (term.value, False) + + +def tuple_to_term(value, is_uri): + """Convert (value, is_uri) tuple from database to Term.""" + if is_uri: + return Term(type=IRI, iri=value) + else: + return Term(type=LITERAL, value=value) from .. exceptions import RequestError from cassandra.cluster import Cluster @@ -215,8 +231,7 @@ class LibraryTableStore: metadata = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in document.metadata ] @@ -249,8 +264,7 @@ class LibraryTableStore: metadata = [ ( - v.s.value, v.s.is_uri, v.p.value, v.p.is_uri, - v.o.value, v.o.is_uri + *term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o) ) for v in document.metadata ] @@ -331,9 +345,9 @@ class LibraryTableStore: comments = row[4], metadata = [ Triple( - s=Value(value=m[0], is_uri=m[1]), - p=Value(value=m[2], is_uri=m[3]), - o=Value(value=m[4], is_uri=m[5]) + s=tuple_to_term(m[0], m[1]), + p=tuple_to_term(m[2], m[3]), + o=tuple_to_term(m[4], m[5]) ) for m in row[5] ], @@ -376,9 +390,9 @@ class LibraryTableStore: comments = row[3], metadata = [ Triple( - s=Value(value=m[0], is_uri=m[1]), - p=Value(value=m[2], is_uri=m[3]), - o=Value(value=m[4], is_uri=m[5]) + s=tuple_to_term(m[0], m[1]), + p=tuple_to_term(m[2], m[3]), + o=tuple_to_term(m[4], m[5]) ) for m in row[4] ], diff --git a/trustgraph-ocr/pyproject.toml b/trustgraph-ocr/pyproject.toml index 71557639..d089180a 100644 --- a/trustgraph-ocr/pyproject.toml +++ b/trustgraph-ocr/pyproject.toml @@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", "pulsar-client", "prometheus-client", "boto3", diff --git a/trustgraph-vertexai/pyproject.toml b/trustgraph-vertexai/pyproject.toml index 7d958ac6..e5166a19 100644 --- a/trustgraph-vertexai/pyproject.toml +++ b/trustgraph-vertexai/pyproject.toml @@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc readme = "README.md" requires-python = ">=3.8" dependencies = [ - "trustgraph-base>=1.9,<1.10", + "trustgraph-base>=2.0,<2.1", "pulsar-client", "google-cloud-aiplatform", "prometheus-client",