mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Changed schema for Value -> Term, majorly breaking change (#622)
* Changed schema for Value -> Term, majorly breaking change * Following the schema change, Value -> Term into all processing * Updated Cassandra for g, p, s, o index patterns (7 indexes) * Reviewed and updated all tests * Neo4j, Memgraph and FalkorDB remain broken, will look at once settled down
This commit is contained in:
parent
e061f2c633
commit
cf0daedefa
86 changed files with 2458 additions and 1764 deletions
4
Makefile
4
Makefile
|
|
@ -70,8 +70,8 @@ some-containers:
|
||||||
-t ${CONTAINER_BASE}/trustgraph-base:${VERSION} .
|
-t ${CONTAINER_BASE}/trustgraph-base:${VERSION} .
|
||||||
${DOCKER} build -f containers/Containerfile.flow \
|
${DOCKER} build -f containers/Containerfile.flow \
|
||||||
-t ${CONTAINER_BASE}/trustgraph-flow:${VERSION} .
|
-t ${CONTAINER_BASE}/trustgraph-flow:${VERSION} .
|
||||||
${DOCKER} build -f containers/Containerfile.vertexai \
|
# ${DOCKER} build -f containers/Containerfile.vertexai \
|
||||||
-t ${CONTAINER_BASE}/trustgraph-vertexai:${VERSION} .
|
# -t ${CONTAINER_BASE}/trustgraph-vertexai:${VERSION} .
|
||||||
# ${DOCKER} build -f containers/Containerfile.mcp \
|
# ${DOCKER} build -f containers/Containerfile.mcp \
|
||||||
# -t ${CONTAINER_BASE}/trustgraph-mcp:${VERSION} .
|
# -t ${CONTAINER_BASE}/trustgraph-mcp:${VERSION} .
|
||||||
# ${DOCKER} build -f containers/Containerfile.vertexai \
|
# ${DOCKER} build -f containers/Containerfile.vertexai \
|
||||||
|
|
|
||||||
|
|
@ -228,10 +228,15 @@ Following SPARQL conventions for backward compatibility:
|
||||||
|
|
||||||
- **`g` omitted / None**: Query the default graph only
|
- **`g` omitted / None**: Query the default graph only
|
||||||
- **`g` = specific IRI**: Query that named graph only
|
- **`g` = specific IRI**: Query that named graph only
|
||||||
- **`g` = wildcard / `*`**: Query across all graphs
|
- **`g` = wildcard / `*`**: Query across all graphs (equivalent to SPARQL
|
||||||
|
`GRAPH ?g { ... }`)
|
||||||
|
|
||||||
This keeps simple queries simple and makes named graph queries opt-in.
|
This keeps simple queries simple and makes named graph queries opt-in.
|
||||||
|
|
||||||
|
Cross-graph queries (g=wildcard) are fully supported. The Cassandra schema
|
||||||
|
includes dedicated tables (SPOG, POSG, OSPG) where g is a clustering column
|
||||||
|
rather than a partition key, enabling efficient queries across all graphs.
|
||||||
|
|
||||||
#### Temporal Queries
|
#### Temporal Queries
|
||||||
|
|
||||||
**Find all facts discovered after a given date:**
|
**Find all facts discovered after a given date:**
|
||||||
|
|
@ -388,12 +393,78 @@ will proceed in phases:
|
||||||
Cassandra requires multiple tables to support different query access patterns
|
Cassandra requires multiple tables to support different query access patterns
|
||||||
(each table efficiently queries by its partition key + clustering columns).
|
(each table efficiently queries by its partition key + clustering columns).
|
||||||
|
|
||||||
**Challenge: Quads**
|
##### Query Patterns
|
||||||
|
|
||||||
For triples, typical indexes are SPO, POS, OSP (partition by first, cluster by
|
With quads (g, s, p, o), each position can be specified or wildcard, giving
|
||||||
rest). For quads, the graph dimension adds: SPOG, POSG, OSPG, GSPO, etc.
|
16 possible query patterns:
|
||||||
|
|
||||||
**Challenge: Quoted Triples**
|
| # | g | s | p | o | Description |
|
||||||
|
|---|---|---|---|---|-------------|
|
||||||
|
| 1 | ? | ? | ? | ? | All quads |
|
||||||
|
| 2 | ? | ? | ? | o | By object |
|
||||||
|
| 3 | ? | ? | p | ? | By predicate |
|
||||||
|
| 4 | ? | ? | p | o | By predicate + object |
|
||||||
|
| 5 | ? | s | ? | ? | By subject |
|
||||||
|
| 6 | ? | s | ? | o | By subject + object |
|
||||||
|
| 7 | ? | s | p | ? | By subject + predicate |
|
||||||
|
| 8 | ? | s | p | o | Full triple (which graphs?) |
|
||||||
|
| 9 | g | ? | ? | ? | By graph |
|
||||||
|
| 10 | g | ? | ? | o | By graph + object |
|
||||||
|
| 11 | g | ? | p | ? | By graph + predicate |
|
||||||
|
| 12 | g | ? | p | o | By graph + predicate + object |
|
||||||
|
| 13 | g | s | ? | ? | By graph + subject |
|
||||||
|
| 14 | g | s | ? | o | By graph + subject + object |
|
||||||
|
| 15 | g | s | p | ? | By graph + subject + predicate |
|
||||||
|
| 16 | g | s | p | o | Exact quad |
|
||||||
|
|
||||||
|
##### Table Design
|
||||||
|
|
||||||
|
Cassandra constraint: You can only efficiently query by partition key, then
|
||||||
|
filter on clustering columns left-to-right. For g-wildcard queries, g must be
|
||||||
|
a clustering column. For g-specified queries, g in the partition key is more
|
||||||
|
efficient.
|
||||||
|
|
||||||
|
**Two table families needed:**
|
||||||
|
|
||||||
|
**Family A: g-wildcard queries** (g in clustering columns)
|
||||||
|
|
||||||
|
| Table | Partition | Clustering | Supports patterns |
|
||||||
|
|-------|-----------|------------|-------------------|
|
||||||
|
| SPOG | (user, collection, s) | p, o, g | 5, 7, 8 |
|
||||||
|
| POSG | (user, collection, p) | o, s, g | 3, 4 |
|
||||||
|
| OSPG | (user, collection, o) | s, p, g | 2, 6 |
|
||||||
|
|
||||||
|
**Family B: g-specified queries** (g in partition key)
|
||||||
|
|
||||||
|
| Table | Partition | Clustering | Supports patterns |
|
||||||
|
|-------|-----------|------------|-------------------|
|
||||||
|
| GSPO | (user, collection, g, s) | p, o | 9, 13, 15, 16 |
|
||||||
|
| GPOS | (user, collection, g, p) | o, s | 11, 12 |
|
||||||
|
| GOSP | (user, collection, g, o) | s, p | 10, 14 |
|
||||||
|
|
||||||
|
**Collection table** (for iteration and bulk deletion)
|
||||||
|
|
||||||
|
| Table | Partition | Clustering | Purpose |
|
||||||
|
|-------|-----------|------------|---------|
|
||||||
|
| COLL | (user, collection) | g, s, p, o | Enumerate all quads in collection |
|
||||||
|
|
||||||
|
##### Write and Delete Paths
|
||||||
|
|
||||||
|
**Write path**: Insert into all 7 tables.
|
||||||
|
|
||||||
|
**Delete collection path**:
|
||||||
|
1. Iterate COLL table for `(user, collection)`
|
||||||
|
2. For each quad, delete from all 6 query tables
|
||||||
|
3. Delete from COLL table (or range delete)
|
||||||
|
|
||||||
|
**Delete single quad path**: Delete from all 7 tables directly.
|
||||||
|
|
||||||
|
##### Storage Cost
|
||||||
|
|
||||||
|
Each quad is stored 7 times. This is the cost of flexible querying combined
|
||||||
|
with efficient collection deletion.
|
||||||
|
|
||||||
|
##### Quoted Triples in Storage
|
||||||
|
|
||||||
Subject or object can be a triple itself. Options:
|
Subject or object can be a triple itself. Options:
|
||||||
|
|
||||||
|
|
@ -425,29 +496,9 @@ Metadata table:
|
||||||
- Pro: Clean separation, can index triple IDs
|
- Pro: Clean separation, can index triple IDs
|
||||||
- Con: Requires computing/managing triple identity, two-phase lookups
|
- Con: Requires computing/managing triple identity, two-phase lookups
|
||||||
|
|
||||||
**Option C: Hybrid**
|
**Recommendation**: Start with Option A (serialized strings) for simplicity.
|
||||||
- Store quads normally with serialized quoted triple strings for simple cases
|
Option B may be needed if advanced query patterns over quoted triple
|
||||||
- Maintain a separate triple ID lookup for advanced queries
|
components are required.
|
||||||
- Pro: Flexibility
|
|
||||||
- Con: Complexity
|
|
||||||
|
|
||||||
**Recommendation**: TBD after prototyping. Option A is simplest for initial
|
|
||||||
implementation; Option B may be needed for advanced query patterns.
|
|
||||||
|
|
||||||
#### Indexing Strategy
|
|
||||||
|
|
||||||
Indexes must support the defined query patterns:
|
|
||||||
|
|
||||||
| Query Type | Access Pattern | Index Needed |
|
|
||||||
|------------|----------------|--------------|
|
|
||||||
| Facts by date | P=discoveredOn, O>date | POG (predicate, object, graph) |
|
|
||||||
| Facts by source | P=supportedBy, O=source | POG |
|
|
||||||
| Facts by asserter | P=assertedBy, O=person | POG |
|
|
||||||
| Metadata for a fact | S=quotedTriple | SPO/SPOG |
|
|
||||||
| All facts in graph | G=graphIRI | GSPO |
|
|
||||||
|
|
||||||
For temporal range queries (dates), Cassandra clustering column ordering
|
|
||||||
enables efficient scans when date is a clustering column.
|
|
||||||
|
|
||||||
2. **Phase 2+: Other Backends**
|
2. **Phase 2+: Other Backends**
|
||||||
- Neo4j and other stores implemented in subsequent stages
|
- Neo4j and other stores implemented in subsequent stages
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,10 @@ from trustgraph.schema import (
|
||||||
TextCompletionRequest, TextCompletionResponse,
|
TextCompletionRequest, TextCompletionResponse,
|
||||||
DocumentRagQuery, DocumentRagResponse,
|
DocumentRagQuery, DocumentRagResponse,
|
||||||
AgentRequest, AgentResponse, AgentStep,
|
AgentRequest, AgentResponse, AgentStep,
|
||||||
Chunk, Triple, Triples, Value, Error,
|
Chunk, Triple, Triples, Term, Error,
|
||||||
EntityContext, EntityContexts,
|
EntityContext, EntityContexts,
|
||||||
GraphEmbeddings, EntityEmbeddings,
|
GraphEmbeddings, EntityEmbeddings,
|
||||||
Metadata
|
Metadata, IRI, LITERAL
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ def schema_registry():
|
||||||
"Chunk": Chunk,
|
"Chunk": Chunk,
|
||||||
"Triple": Triple,
|
"Triple": Triple,
|
||||||
"Triples": Triples,
|
"Triples": Triples,
|
||||||
"Value": Value,
|
"Term": Term,
|
||||||
"Error": Error,
|
"Error": Error,
|
||||||
"EntityContext": EntityContext,
|
"EntityContext": EntityContext,
|
||||||
"EntityContexts": EntityContexts,
|
"EntityContexts": EntityContexts,
|
||||||
|
|
@ -98,26 +98,22 @@ def sample_message_data():
|
||||||
"collection": "test_collection",
|
"collection": "test_collection",
|
||||||
"metadata": []
|
"metadata": []
|
||||||
},
|
},
|
||||||
"Value": {
|
"Term": {
|
||||||
"value": "http://example.com/entity",
|
"type": IRI,
|
||||||
"is_uri": True,
|
"iri": "http://example.com/entity"
|
||||||
"type": ""
|
|
||||||
},
|
},
|
||||||
"Triple": {
|
"Triple": {
|
||||||
"s": Value(
|
"s": Term(
|
||||||
value="http://example.com/subject",
|
type=IRI,
|
||||||
is_uri=True,
|
iri="http://example.com/subject"
|
||||||
type=""
|
|
||||||
),
|
),
|
||||||
"p": Value(
|
"p": Term(
|
||||||
value="http://example.com/predicate",
|
type=IRI,
|
||||||
is_uri=True,
|
iri="http://example.com/predicate"
|
||||||
type=""
|
|
||||||
),
|
),
|
||||||
"o": Value(
|
"o": Term(
|
||||||
value="Object value",
|
type=LITERAL,
|
||||||
is_uri=False,
|
value="Object value"
|
||||||
type=""
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -139,10 +135,10 @@ def invalid_message_data():
|
||||||
{"query": "test", "user": "test", "collection": "test", "doc_limit": -1}, # Invalid doc_limit
|
{"query": "test", "user": "test", "collection": "test", "doc_limit": -1}, # Invalid doc_limit
|
||||||
{"query": "test"}, # Missing required fields
|
{"query": "test"}, # Missing required fields
|
||||||
],
|
],
|
||||||
"Value": [
|
"Term": [
|
||||||
{"value": None, "is_uri": True, "type": ""}, # Invalid value (None)
|
{"type": IRI, "iri": None}, # Invalid iri (None)
|
||||||
{"value": "test", "is_uri": "not_boolean", "type": ""}, # Invalid is_uri
|
{"type": "invalid_type", "value": "test"}, # Invalid type
|
||||||
{"value": 123, "is_uri": True, "type": ""}, # Invalid value (not string)
|
{"type": LITERAL, "value": 123}, # Invalid value (not string)
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,14 +15,14 @@ from trustgraph.schema import (
|
||||||
TextCompletionRequest, TextCompletionResponse,
|
TextCompletionRequest, TextCompletionResponse,
|
||||||
DocumentRagQuery, DocumentRagResponse,
|
DocumentRagQuery, DocumentRagResponse,
|
||||||
AgentRequest, AgentResponse, AgentStep,
|
AgentRequest, AgentResponse, AgentStep,
|
||||||
Chunk, Triple, Triples, Value, Error,
|
Chunk, Triple, Triples, Term, Error,
|
||||||
EntityContext, EntityContexts,
|
EntityContext, EntityContexts,
|
||||||
GraphEmbeddings, EntityEmbeddings,
|
GraphEmbeddings, EntityEmbeddings,
|
||||||
Metadata, Field, RowSchema,
|
Metadata, Field, RowSchema,
|
||||||
StructuredDataSubmission, ExtractedObject,
|
StructuredDataSubmission, ExtractedObject,
|
||||||
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
||||||
StructuredQueryRequest, StructuredQueryResponse,
|
StructuredQueryRequest, StructuredQueryResponse,
|
||||||
StructuredObjectEmbedding
|
StructuredObjectEmbedding, IRI, LITERAL
|
||||||
)
|
)
|
||||||
from .conftest import validate_schema_contract, serialize_deserialize_test
|
from .conftest import validate_schema_contract, serialize_deserialize_test
|
||||||
|
|
||||||
|
|
@ -271,45 +271,44 @@ class TestAgentMessageContracts:
|
||||||
class TestGraphMessageContracts:
|
class TestGraphMessageContracts:
|
||||||
"""Contract tests for Graph/Knowledge message schemas"""
|
"""Contract tests for Graph/Knowledge message schemas"""
|
||||||
|
|
||||||
def test_value_schema_contract(self, sample_message_data):
|
def test_term_schema_contract(self, sample_message_data):
|
||||||
"""Test Value schema contract"""
|
"""Test Term schema contract"""
|
||||||
# Arrange
|
# Arrange
|
||||||
value_data = sample_message_data["Value"]
|
term_data = sample_message_data["Term"]
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
assert validate_schema_contract(Value, value_data)
|
assert validate_schema_contract(Term, term_data)
|
||||||
|
|
||||||
# Test URI value
|
# Test URI term
|
||||||
uri_value = Value(**value_data)
|
uri_term = Term(**term_data)
|
||||||
assert uri_value.value == "http://example.com/entity"
|
assert uri_term.iri == "http://example.com/entity"
|
||||||
assert uri_value.is_uri is True
|
assert uri_term.type == IRI
|
||||||
|
|
||||||
# Test literal value
|
# Test literal term
|
||||||
literal_value = Value(
|
literal_term = Term(
|
||||||
value="Literal text value",
|
type=LITERAL,
|
||||||
is_uri=False,
|
value="Literal text value"
|
||||||
type=""
|
|
||||||
)
|
)
|
||||||
assert literal_value.value == "Literal text value"
|
assert literal_term.value == "Literal text value"
|
||||||
assert literal_value.is_uri is False
|
assert literal_term.type == LITERAL
|
||||||
|
|
||||||
def test_triple_schema_contract(self, sample_message_data):
|
def test_triple_schema_contract(self, sample_message_data):
|
||||||
"""Test Triple schema contract"""
|
"""Test Triple schema contract"""
|
||||||
# Arrange
|
# Arrange
|
||||||
triple_data = sample_message_data["Triple"]
|
triple_data = sample_message_data["Triple"]
|
||||||
|
|
||||||
# Act & Assert - Triple uses Value objects, not dict validation
|
# Act & Assert - Triple uses Term objects, not dict validation
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=triple_data["s"],
|
s=triple_data["s"],
|
||||||
p=triple_data["p"],
|
p=triple_data["p"],
|
||||||
o=triple_data["o"]
|
o=triple_data["o"]
|
||||||
)
|
)
|
||||||
assert triple.s.value == "http://example.com/subject"
|
assert triple.s.iri == "http://example.com/subject"
|
||||||
assert triple.p.value == "http://example.com/predicate"
|
assert triple.p.iri == "http://example.com/predicate"
|
||||||
assert triple.o.value == "Object value"
|
assert triple.o.value == "Object value"
|
||||||
assert triple.s.is_uri is True
|
assert triple.s.type == IRI
|
||||||
assert triple.p.is_uri is True
|
assert triple.p.type == IRI
|
||||||
assert triple.o.is_uri is False
|
assert triple.o.type == LITERAL
|
||||||
|
|
||||||
def test_triples_schema_contract(self, sample_message_data):
|
def test_triples_schema_contract(self, sample_message_data):
|
||||||
"""Test Triples (batch) schema contract"""
|
"""Test Triples (batch) schema contract"""
|
||||||
|
|
@ -328,7 +327,7 @@ class TestGraphMessageContracts:
|
||||||
triples = Triples(**triples_data)
|
triples = Triples(**triples_data)
|
||||||
assert triples.metadata.id == "test-doc-123"
|
assert triples.metadata.id == "test-doc-123"
|
||||||
assert len(triples.triples) == 1
|
assert len(triples.triples) == 1
|
||||||
assert triples.triples[0].s.value == "http://example.com/subject"
|
assert triples.triples[0].s.iri == "http://example.com/subject"
|
||||||
|
|
||||||
def test_chunk_schema_contract(self, sample_message_data):
|
def test_chunk_schema_contract(self, sample_message_data):
|
||||||
"""Test Chunk schema contract"""
|
"""Test Chunk schema contract"""
|
||||||
|
|
@ -349,9 +348,9 @@ class TestGraphMessageContracts:
|
||||||
def test_entity_context_schema_contract(self):
|
def test_entity_context_schema_contract(self):
|
||||||
"""Test EntityContext schema contract"""
|
"""Test EntityContext schema contract"""
|
||||||
# Arrange
|
# Arrange
|
||||||
entity_value = Value(value="http://example.com/entity", is_uri=True, type="")
|
entity_term = Term(type=IRI, iri="http://example.com/entity")
|
||||||
entity_context_data = {
|
entity_context_data = {
|
||||||
"entity": entity_value,
|
"entity": entity_term,
|
||||||
"context": "Context information about the entity"
|
"context": "Context information about the entity"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -359,16 +358,16 @@ class TestGraphMessageContracts:
|
||||||
assert validate_schema_contract(EntityContext, entity_context_data)
|
assert validate_schema_contract(EntityContext, entity_context_data)
|
||||||
|
|
||||||
entity_context = EntityContext(**entity_context_data)
|
entity_context = EntityContext(**entity_context_data)
|
||||||
assert entity_context.entity.value == "http://example.com/entity"
|
assert entity_context.entity.iri == "http://example.com/entity"
|
||||||
assert entity_context.context == "Context information about the entity"
|
assert entity_context.context == "Context information about the entity"
|
||||||
|
|
||||||
def test_entity_contexts_batch_schema_contract(self, sample_message_data):
|
def test_entity_contexts_batch_schema_contract(self, sample_message_data):
|
||||||
"""Test EntityContexts (batch) schema contract"""
|
"""Test EntityContexts (batch) schema contract"""
|
||||||
# Arrange
|
# Arrange
|
||||||
metadata = Metadata(**sample_message_data["Metadata"])
|
metadata = Metadata(**sample_message_data["Metadata"])
|
||||||
entity_value = Value(value="http://example.com/entity", is_uri=True, type="")
|
entity_term = Term(type=IRI, iri="http://example.com/entity")
|
||||||
entity_context = EntityContext(
|
entity_context = EntityContext(
|
||||||
entity=entity_value,
|
entity=entity_term,
|
||||||
context="Entity context"
|
context="Entity context"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -420,7 +419,7 @@ class TestMetadataMessageContracts:
|
||||||
|
|
||||||
metadata = Metadata(**metadata_data)
|
metadata = Metadata(**metadata_data)
|
||||||
assert len(metadata.metadata) == 1
|
assert len(metadata.metadata) == 1
|
||||||
assert metadata.metadata[0].s.value == "http://example.com/subject"
|
assert metadata.metadata[0].s.iri == "http://example.com/subject"
|
||||||
|
|
||||||
def test_error_schema_contract(self):
|
def test_error_schema_contract(self):
|
||||||
"""Test Error schema contract"""
|
"""Test Error schema contract"""
|
||||||
|
|
@ -532,7 +531,7 @@ class TestSerializationContracts:
|
||||||
# Test each schema in the registry
|
# Test each schema in the registry
|
||||||
for schema_name, schema_class in schema_registry.items():
|
for schema_name, schema_class in schema_registry.items():
|
||||||
if schema_name in sample_message_data:
|
if schema_name in sample_message_data:
|
||||||
# Skip Triple schema as it requires special handling with Value objects
|
# Skip Triple schema as it requires special handling with Term objects
|
||||||
if schema_name == "Triple":
|
if schema_name == "Triple":
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
@ -541,7 +540,7 @@ class TestSerializationContracts:
|
||||||
assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}"
|
assert serialize_deserialize_test(schema_class, data), f"Serialization failed for {schema_name}"
|
||||||
|
|
||||||
def test_triple_serialization_contract(self, sample_message_data):
|
def test_triple_serialization_contract(self, sample_message_data):
|
||||||
"""Test Triple schema serialization contract with Value objects"""
|
"""Test Triple schema serialization contract with Term objects"""
|
||||||
# Arrange
|
# Arrange
|
||||||
triple_data = sample_message_data["Triple"]
|
triple_data = sample_message_data["Triple"]
|
||||||
|
|
||||||
|
|
@ -552,13 +551,13 @@ class TestSerializationContracts:
|
||||||
o=triple_data["o"]
|
o=triple_data["o"]
|
||||||
)
|
)
|
||||||
|
|
||||||
# Assert - Test that Value objects are properly constructed and accessible
|
# Assert - Test that Term objects are properly constructed and accessible
|
||||||
assert triple.s.value == "http://example.com/subject"
|
assert triple.s.iri == "http://example.com/subject"
|
||||||
assert triple.p.value == "http://example.com/predicate"
|
assert triple.p.iri == "http://example.com/predicate"
|
||||||
assert triple.o.value == "Object value"
|
assert triple.o.value == "Object value"
|
||||||
assert isinstance(triple.s, Value)
|
assert isinstance(triple.s, Term)
|
||||||
assert isinstance(triple.p, Value)
|
assert isinstance(triple.p, Term)
|
||||||
assert isinstance(triple.o, Value)
|
assert isinstance(triple.o, Term)
|
||||||
|
|
||||||
def test_nested_schema_serialization_contract(self, sample_message_data):
|
def test_nested_schema_serialization_contract(self, sample_message_data):
|
||||||
"""Test serialization of nested schemas"""
|
"""Test serialization of nested schemas"""
|
||||||
|
|
@ -570,7 +569,7 @@ class TestSerializationContracts:
|
||||||
|
|
||||||
# Verify nested objects maintain their contracts
|
# Verify nested objects maintain their contracts
|
||||||
assert triples.metadata.id == "test-doc-123"
|
assert triples.metadata.id == "test-doc-123"
|
||||||
assert triples.triples[0].s.value == "http://example.com/subject"
|
assert triples.triples[0].s.iri == "http://example.com/subject"
|
||||||
|
|
||||||
def test_array_field_serialization_contract(self):
|
def test_array_field_serialization_contract(self):
|
||||||
"""Test serialization of array fields"""
|
"""Test serialization of array fields"""
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from trustgraph.schema import (
|
||||||
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
QuestionToStructuredQueryRequest, QuestionToStructuredQueryResponse,
|
||||||
StructuredQueryRequest, StructuredQueryResponse,
|
StructuredQueryRequest, StructuredQueryResponse,
|
||||||
StructuredObjectEmbedding, Field, RowSchema,
|
StructuredObjectEmbedding, Field, RowSchema,
|
||||||
Metadata, Error, Value
|
Metadata, Error
|
||||||
)
|
)
|
||||||
from .conftest import serialize_deserialize_test
|
from .conftest import serialize_deserialize_test
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ import json
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
||||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
|
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
|
||||||
from trustgraph.schema import EntityContext, EntityContexts, AgentRequest, AgentResponse
|
from trustgraph.schema import EntityContext, EntityContexts, AgentRequest, AgentResponse
|
||||||
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||||
from trustgraph.template.prompt_manager import PromptManager
|
from trustgraph.template.prompt_manager import PromptManager
|
||||||
|
|
@ -78,9 +78,9 @@ class TestAgentKgExtractionIntegration:
|
||||||
id="doc123",
|
id="doc123",
|
||||||
metadata=[
|
metadata=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="doc123", is_uri=True),
|
s=Term(type=IRI, iri="doc123"),
|
||||||
p=Value(value="http://example.org/type", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/type"),
|
||||||
o=Value(value="document", is_uri=False)
|
o=Term(type=LITERAL, value="document")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -178,15 +178,15 @@ class TestAgentKgExtractionIntegration:
|
||||||
assert len(sent_triples.triples) > 0
|
assert len(sent_triples.triples) > 0
|
||||||
|
|
||||||
# Check that we have definition triples
|
# Check that we have definition triples
|
||||||
definition_triples = [t for t in sent_triples.triples if t.p.value == DEFINITION]
|
definition_triples = [t for t in sent_triples.triples if t.p.iri == DEFINITION]
|
||||||
assert len(definition_triples) >= 2 # Should have definitions for ML and Neural Networks
|
assert len(definition_triples) >= 2 # Should have definitions for ML and Neural Networks
|
||||||
|
|
||||||
# Check that we have label triples
|
# Check that we have label triples
|
||||||
label_triples = [t for t in sent_triples.triples if t.p.value == RDF_LABEL]
|
label_triples = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL]
|
||||||
assert len(label_triples) >= 2 # Should have labels for entities
|
assert len(label_triples) >= 2 # Should have labels for entities
|
||||||
|
|
||||||
# Check subject-of relationships
|
# Check subject-of relationships
|
||||||
subject_of_triples = [t for t in sent_triples.triples if t.p.value == SUBJECT_OF]
|
subject_of_triples = [t for t in sent_triples.triples if t.p.iri == SUBJECT_OF]
|
||||||
assert len(subject_of_triples) >= 2 # Entities should be linked to document
|
assert len(subject_of_triples) >= 2 # Entities should be linked to document
|
||||||
|
|
||||||
# Verify entity contexts were emitted
|
# Verify entity contexts were emitted
|
||||||
|
|
@ -198,7 +198,7 @@ class TestAgentKgExtractionIntegration:
|
||||||
assert len(sent_contexts.entities) >= 2 # Should have contexts for both entities
|
assert len(sent_contexts.entities) >= 2 # Should have contexts for both entities
|
||||||
|
|
||||||
# Verify entity URIs are properly formed
|
# Verify entity URIs are properly formed
|
||||||
entity_uris = [ec.entity.value for ec in sent_contexts.entities]
|
entity_uris = [ec.entity.iri for ec in sent_contexts.entities]
|
||||||
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
|
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
|
||||||
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
|
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
|
||||||
|
|
||||||
|
|
@ -401,7 +401,7 @@ class TestAgentKgExtractionIntegration:
|
||||||
|
|
||||||
sent_triples = triples_publisher.send.call_args[0][0]
|
sent_triples = triples_publisher.send.call_args[0][0]
|
||||||
# Check that unicode entity was properly processed
|
# Check that unicode entity was properly processed
|
||||||
entity_labels = [t for t in sent_triples.triples if t.p.value == RDF_LABEL and t.o.value == "機械学習"]
|
entity_labels = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL and t.o.value == "機械学習"]
|
||||||
assert len(entity_labels) > 0
|
assert len(entity_labels) > 0
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ from .cassandra_test_helper import cassandra_container
|
||||||
from trustgraph.direct.cassandra_kg import KnowledgeGraph
|
from trustgraph.direct.cassandra_kg import KnowledgeGraph
|
||||||
from trustgraph.storage.triples.cassandra.write import Processor as StorageProcessor
|
from trustgraph.storage.triples.cassandra.write import Processor as StorageProcessor
|
||||||
from trustgraph.query.triples.cassandra.service import Processor as QueryProcessor
|
from trustgraph.query.triples.cassandra.service import Processor as QueryProcessor
|
||||||
from trustgraph.schema import Triple, Value, Metadata, Triples, TriplesQueryRequest
|
from trustgraph.schema import Triple, Term, Metadata, Triples, TriplesQueryRequest, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.integration
|
@pytest.mark.integration
|
||||||
|
|
@ -118,19 +118,19 @@ class TestCassandraIntegration:
|
||||||
metadata=Metadata(user="testuser", collection="testcol"),
|
metadata=Metadata(user="testuser", collection="testcol"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/person1", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/person1"),
|
||||||
p=Value(value="http://example.org/name", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/name"),
|
||||||
o=Value(value="Alice Smith", is_uri=False)
|
o=Term(type=LITERAL, value="Alice Smith")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/person1", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/person1"),
|
||||||
p=Value(value="http://example.org/age", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/age"),
|
||||||
o=Value(value="25", is_uri=False)
|
o=Term(type=LITERAL, value="25")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/person1", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/person1"),
|
||||||
p=Value(value="http://example.org/department", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/department"),
|
||||||
o=Value(value="Engineering", is_uri=False)
|
o=Term(type=LITERAL, value="Engineering")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -181,19 +181,19 @@ class TestCassandraIntegration:
|
||||||
metadata=Metadata(user="testuser", collection="testcol"),
|
metadata=Metadata(user="testuser", collection="testcol"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/alice"),
|
||||||
p=Value(value="http://example.org/knows", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/knows"),
|
||||||
o=Value(value="http://example.org/bob", is_uri=True)
|
o=Term(type=IRI, iri="http://example.org/bob")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/alice"),
|
||||||
p=Value(value="http://example.org/age", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/age"),
|
||||||
o=Value(value="30", is_uri=False)
|
o=Term(type=LITERAL, value="30")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/bob", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/bob"),
|
||||||
p=Value(value="http://example.org/knows", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/knows"),
|
||||||
o=Value(value="http://example.org/charlie", is_uri=True)
|
o=Term(type=IRI, iri="http://example.org/charlie")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -208,7 +208,7 @@ class TestCassandraIntegration:
|
||||||
|
|
||||||
# Test S query (find all relationships for Alice)
|
# Test S query (find all relationships for Alice)
|
||||||
s_query = TriplesQueryRequest(
|
s_query = TriplesQueryRequest(
|
||||||
s=Value(value="http://example.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/alice"),
|
||||||
p=None, # None for wildcard
|
p=None, # None for wildcard
|
||||||
o=None, # None for wildcard
|
o=None, # None for wildcard
|
||||||
limit=10,
|
limit=10,
|
||||||
|
|
@ -218,10 +218,10 @@ class TestCassandraIntegration:
|
||||||
s_results = await query_processor.query_triples(s_query)
|
s_results = await query_processor.query_triples(s_query)
|
||||||
print(f"Query processor results: {len(s_results)}")
|
print(f"Query processor results: {len(s_results)}")
|
||||||
for result in s_results:
|
for result in s_results:
|
||||||
print(f" S={result.s.value}, P={result.p.value}, O={result.o.value}")
|
print(f" S={result.s.iri}, P={result.p.iri}, O={result.o.iri if result.o.type == IRI else result.o.value}")
|
||||||
assert len(s_results) == 2
|
assert len(s_results) == 2
|
||||||
|
|
||||||
s_predicates = [t.p.value for t in s_results]
|
s_predicates = [t.p.iri for t in s_results]
|
||||||
assert "http://example.org/knows" in s_predicates
|
assert "http://example.org/knows" in s_predicates
|
||||||
assert "http://example.org/age" in s_predicates
|
assert "http://example.org/age" in s_predicates
|
||||||
print("✓ Subject queries via processor working")
|
print("✓ Subject queries via processor working")
|
||||||
|
|
@ -229,7 +229,7 @@ class TestCassandraIntegration:
|
||||||
# Test P query (find all "knows" relationships)
|
# Test P query (find all "knows" relationships)
|
||||||
p_query = TriplesQueryRequest(
|
p_query = TriplesQueryRequest(
|
||||||
s=None, # None for wildcard
|
s=None, # None for wildcard
|
||||||
p=Value(value="http://example.org/knows", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/knows"),
|
||||||
o=None, # None for wildcard
|
o=None, # None for wildcard
|
||||||
limit=10,
|
limit=10,
|
||||||
user="testuser",
|
user="testuser",
|
||||||
|
|
@ -239,7 +239,7 @@ class TestCassandraIntegration:
|
||||||
print(p_results)
|
print(p_results)
|
||||||
assert len(p_results) == 2 # Alice knows Bob, Bob knows Charlie
|
assert len(p_results) == 2 # Alice knows Bob, Bob knows Charlie
|
||||||
|
|
||||||
p_subjects = [t.s.value for t in p_results]
|
p_subjects = [t.s.iri for t in p_results]
|
||||||
assert "http://example.org/alice" in p_subjects
|
assert "http://example.org/alice" in p_subjects
|
||||||
assert "http://example.org/bob" in p_subjects
|
assert "http://example.org/bob" in p_subjects
|
||||||
print("✓ Predicate queries via processor working")
|
print("✓ Predicate queries via processor working")
|
||||||
|
|
@ -262,19 +262,19 @@ class TestCassandraIntegration:
|
||||||
metadata=Metadata(user="concurrent_test", collection="people"),
|
metadata=Metadata(user="concurrent_test", collection="people"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
|
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
|
||||||
p=Value(value="http://example.org/name", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/name"),
|
||||||
o=Value(value=name, is_uri=False)
|
o=Term(type=LITERAL, value=name)
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
|
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
|
||||||
p=Value(value="http://example.org/age", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/age"),
|
||||||
o=Value(value=str(age), is_uri=False)
|
o=Term(type=LITERAL, value=str(age))
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=f"http://example.org/{person_id}", is_uri=True),
|
s=Term(type=IRI, iri=f"http://example.org/{person_id}"),
|
||||||
p=Value(value="http://example.org/department", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/department"),
|
||||||
o=Value(value=department, is_uri=False)
|
o=Term(type=LITERAL, value=department)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -333,36 +333,36 @@ class TestCassandraIntegration:
|
||||||
triples=[
|
triples=[
|
||||||
# People and their types
|
# People and their types
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/alice"),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value="http://company.org/Employee", is_uri=True)
|
o=Term(type=IRI, iri="http://company.org/Employee")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/bob", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/bob"),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value="http://company.org/Employee", is_uri=True)
|
o=Term(type=IRI, iri="http://company.org/Employee")
|
||||||
),
|
),
|
||||||
# Relationships
|
# Relationships
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/alice"),
|
||||||
p=Value(value="http://company.org/reportsTo", is_uri=True),
|
p=Term(type=IRI, iri="http://company.org/reportsTo"),
|
||||||
o=Value(value="http://company.org/bob", is_uri=True)
|
o=Term(type=IRI, iri="http://company.org/bob")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/alice"),
|
||||||
p=Value(value="http://company.org/worksIn", is_uri=True),
|
p=Term(type=IRI, iri="http://company.org/worksIn"),
|
||||||
o=Value(value="http://company.org/engineering", is_uri=True)
|
o=Term(type=IRI, iri="http://company.org/engineering")
|
||||||
),
|
),
|
||||||
# Personal info
|
# Personal info
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/alice"),
|
||||||
p=Value(value="http://company.org/fullName", is_uri=True),
|
p=Term(type=IRI, iri="http://company.org/fullName"),
|
||||||
o=Value(value="Alice Johnson", is_uri=False)
|
o=Term(type=LITERAL, value="Alice Johnson")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://company.org/alice", is_uri=True),
|
s=Term(type=IRI, iri="http://company.org/alice"),
|
||||||
p=Value(value="http://company.org/email", is_uri=True),
|
p=Term(type=IRI, iri="http://company.org/email"),
|
||||||
o=Value(value="alice@company.org", is_uri=False)
|
o=Term(type=LITERAL, value="alice@company.org")
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ class MockWebSocket:
|
||||||
"user": "test-user",
|
"user": "test-user",
|
||||||
"collection": "test-collection"
|
"collection": "test-collection"
|
||||||
},
|
},
|
||||||
"triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -118,7 +118,7 @@ async def test_import_graceful_shutdown_integration(mock_backend):
|
||||||
"user": "test-user",
|
"user": "test-user",
|
||||||
"collection": "test-collection"
|
"collection": "test-collection"
|
||||||
},
|
},
|
||||||
"triples": [{"s": {"v": f"subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"object-{i}", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": f"subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"object-{i}"}}]
|
||||||
}
|
}
|
||||||
messages.append(msg_data)
|
messages.append(msg_data)
|
||||||
|
|
||||||
|
|
@ -163,7 +163,7 @@ async def test_export_no_message_loss_integration(mock_backend):
|
||||||
"user": "test-user",
|
"user": "test-user",
|
||||||
"collection": "test-collection"
|
"collection": "test-collection"
|
||||||
},
|
},
|
||||||
"triples": [{"s": {"v": f"export-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": f"export-object-{i}", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": f"export-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": f"export-object-{i}"}}]
|
||||||
}
|
}
|
||||||
# Create Triples object instead of raw dict
|
# Create Triples object instead of raw dict
|
||||||
from trustgraph.schema import Triples, Metadata
|
from trustgraph.schema import Triples, Metadata
|
||||||
|
|
@ -302,7 +302,7 @@ async def test_concurrent_import_export_shutdown():
|
||||||
"user": "test-user",
|
"user": "test-user",
|
||||||
"collection": "test-collection"
|
"collection": "test-collection"
|
||||||
},
|
},
|
||||||
"triples": [{"s": {"v": f"concurrent-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": f"concurrent-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
|
||||||
}
|
}
|
||||||
await import_handler.receive(msg)
|
await import_handler.receive(msg)
|
||||||
|
|
||||||
|
|
@ -359,7 +359,7 @@ async def test_websocket_close_during_message_processing():
|
||||||
"user": "test-user",
|
"user": "test-user",
|
||||||
"collection": "test-collection"
|
"collection": "test-collection"
|
||||||
},
|
},
|
||||||
"triples": [{"s": {"v": f"slow-subject-{i}", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": f"slow-subject-{i}"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
|
||||||
}
|
}
|
||||||
task = asyncio.create_task(import_handler.receive(msg))
|
task = asyncio.create_task(import_handler.receive(msg))
|
||||||
message_tasks.append(task)
|
message_tasks.append(task)
|
||||||
|
|
@ -423,7 +423,7 @@ async def test_backpressure_during_shutdown():
|
||||||
# Simulate receiving and processing a message
|
# Simulate receiving and processing a message
|
||||||
msg_data = {
|
msg_data = {
|
||||||
"metadata": {"id": f"msg-{i}"},
|
"metadata": {"id": f"msg-{i}"},
|
||||||
"triples": [{"s": {"v": "subject", "e": False}, "p": {"v": "predicate", "e": False}, "o": {"v": "object", "e": False}}]
|
"triples": [{"s": {"t": "l", "v": "subject"}, "p": {"t": "l", "v": "predicate"}, "o": {"t": "l", "v": "object"}}]
|
||||||
}
|
}
|
||||||
await ws.send_json(msg_data)
|
await ws.send_json(msg_data)
|
||||||
# Check if we should stop
|
# Check if we should stop
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
from trustgraph.extract.kg.definitions.extract import Processor as DefinitionsProcessor
|
from trustgraph.extract.kg.definitions.extract import Processor as DefinitionsProcessor
|
||||||
from trustgraph.extract.kg.relationships.extract import Processor as RelationshipsProcessor
|
from trustgraph.extract.kg.relationships.extract import Processor as RelationshipsProcessor
|
||||||
from trustgraph.storage.knowledge.store import Processor as KnowledgeStoreProcessor
|
from trustgraph.storage.knowledge.store import Processor as KnowledgeStoreProcessor
|
||||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
|
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
|
||||||
from trustgraph.schema import EntityContext, EntityContexts, GraphEmbeddings
|
from trustgraph.schema import EntityContext, EntityContexts, GraphEmbeddings
|
||||||
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||||
|
|
||||||
|
|
@ -253,24 +253,24 @@ class TestKnowledgeGraphPipelineIntegration:
|
||||||
|
|
||||||
if s and o:
|
if s and o:
|
||||||
s_uri = definitions_processor.to_uri(s)
|
s_uri = definitions_processor.to_uri(s)
|
||||||
s_value = Value(value=str(s_uri), is_uri=True)
|
s_term = Term(type=IRI, iri=str(s_uri))
|
||||||
o_value = Value(value=str(o), is_uri=False)
|
o_term = Term(type=LITERAL, value=str(o))
|
||||||
|
|
||||||
# Generate triples as the processor would
|
# Generate triples as the processor would
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_term,
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=Term(type=IRI, iri=RDF_LABEL),
|
||||||
o=Value(value=s, is_uri=False)
|
o=Term(type=LITERAL, value=s)
|
||||||
))
|
))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_term,
|
||||||
p=Value(value=DEFINITION, is_uri=True),
|
p=Term(type=IRI, iri=DEFINITION),
|
||||||
o=o_value
|
o=o_term
|
||||||
))
|
))
|
||||||
|
|
||||||
entities.append(EntityContext(
|
entities.append(EntityContext(
|
||||||
entity=s_value,
|
entity=s_term,
|
||||||
context=defn["definition"]
|
context=defn["definition"]
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
@ -279,16 +279,16 @@ class TestKnowledgeGraphPipelineIntegration:
|
||||||
assert len(entities) == 3 # 1 entity context per entity
|
assert len(entities) == 3 # 1 entity context per entity
|
||||||
|
|
||||||
# Verify triple structure
|
# Verify triple structure
|
||||||
label_triples = [t for t in triples if t.p.value == RDF_LABEL]
|
label_triples = [t for t in triples if t.p.iri == RDF_LABEL]
|
||||||
definition_triples = [t for t in triples if t.p.value == DEFINITION]
|
definition_triples = [t for t in triples if t.p.iri == DEFINITION]
|
||||||
|
|
||||||
assert len(label_triples) == 3
|
assert len(label_triples) == 3
|
||||||
assert len(definition_triples) == 3
|
assert len(definition_triples) == 3
|
||||||
|
|
||||||
# Verify entity contexts
|
# Verify entity contexts
|
||||||
for entity in entities:
|
for entity in entities:
|
||||||
assert entity.entity.is_uri is True
|
assert entity.entity.type == IRI
|
||||||
assert entity.entity.value.startswith(TRUSTGRAPH_ENTITIES)
|
assert entity.entity.iri.startswith(TRUSTGRAPH_ENTITIES)
|
||||||
assert len(entity.context) > 0
|
assert len(entity.context) > 0
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -312,49 +312,49 @@ class TestKnowledgeGraphPipelineIntegration:
|
||||||
|
|
||||||
if s and p and o:
|
if s and p and o:
|
||||||
s_uri = relationships_processor.to_uri(s)
|
s_uri = relationships_processor.to_uri(s)
|
||||||
s_value = Value(value=str(s_uri), is_uri=True)
|
s_term = Term(type=IRI, iri=str(s_uri))
|
||||||
|
|
||||||
p_uri = relationships_processor.to_uri(p)
|
p_uri = relationships_processor.to_uri(p)
|
||||||
p_value = Value(value=str(p_uri), is_uri=True)
|
p_term = Term(type=IRI, iri=str(p_uri))
|
||||||
|
|
||||||
if rel["object-entity"]:
|
if rel["object-entity"]:
|
||||||
o_uri = relationships_processor.to_uri(o)
|
o_uri = relationships_processor.to_uri(o)
|
||||||
o_value = Value(value=str(o_uri), is_uri=True)
|
o_term = Term(type=IRI, iri=str(o_uri))
|
||||||
else:
|
else:
|
||||||
o_value = Value(value=str(o), is_uri=False)
|
o_term = Term(type=LITERAL, value=str(o))
|
||||||
|
|
||||||
# Main relationship triple
|
# Main relationship triple
|
||||||
triples.append(Triple(s=s_value, p=p_value, o=o_value))
|
triples.append(Triple(s=s_term, p=p_term, o=o_term))
|
||||||
|
|
||||||
# Label triples
|
# Label triples
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_term,
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=Term(type=IRI, iri=RDF_LABEL),
|
||||||
o=Value(value=str(s), is_uri=False)
|
o=Term(type=LITERAL, value=str(s))
|
||||||
))
|
))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=p_value,
|
s=p_term,
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=Term(type=IRI, iri=RDF_LABEL),
|
||||||
o=Value(value=str(p), is_uri=False)
|
o=Term(type=LITERAL, value=str(p))
|
||||||
))
|
))
|
||||||
|
|
||||||
if rel["object-entity"]:
|
if rel["object-entity"]:
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=o_value,
|
s=o_term,
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=Term(type=IRI, iri=RDF_LABEL),
|
||||||
o=Value(value=str(o), is_uri=False)
|
o=Term(type=LITERAL, value=str(o))
|
||||||
))
|
))
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert len(triples) > 0
|
assert len(triples) > 0
|
||||||
|
|
||||||
# Verify relationship triples exist
|
# Verify relationship triples exist
|
||||||
relationship_triples = [t for t in triples if t.p.value.endswith("is_subset_of") or t.p.value.endswith("is_used_in")]
|
relationship_triples = [t for t in triples if t.p.iri.endswith("is_subset_of") or t.p.iri.endswith("is_used_in")]
|
||||||
assert len(relationship_triples) >= 2
|
assert len(relationship_triples) >= 2
|
||||||
|
|
||||||
# Verify label triples
|
# Verify label triples
|
||||||
label_triples = [t for t in triples if t.p.value == RDF_LABEL]
|
label_triples = [t for t in triples if t.p.iri == RDF_LABEL]
|
||||||
assert len(label_triples) > 0
|
assert len(label_triples) > 0
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -374,9 +374,9 @@ class TestKnowledgeGraphPipelineIntegration:
|
||||||
),
|
),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://trustgraph.ai/e/machine-learning", is_uri=True),
|
s=Term(type=IRI, iri="http://trustgraph.ai/e/machine-learning"),
|
||||||
p=Value(value=DEFINITION, is_uri=True),
|
p=Term(type=IRI, iri=DEFINITION),
|
||||||
o=Value(value="A subset of AI", is_uri=False)
|
o=Term(type=LITERAL, value="A subset of AI")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -602,9 +602,9 @@ class TestKnowledgeGraphPipelineIntegration:
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
metadata=[
|
metadata=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="doc:test", is_uri=True),
|
s=Term(type=IRI, iri="doc:test"),
|
||||||
p=Value(value="dc:title", is_uri=True),
|
p=Term(type=IRI, iri="dc:title"),
|
||||||
o=Value(value="Test Document", is_uri=False)
|
o=Term(type=LITERAL, value="Test Document")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from unittest.mock import AsyncMock, Mock, patch, MagicMock
|
||||||
from unittest.mock import call
|
from unittest.mock import call
|
||||||
|
|
||||||
from trustgraph.cores.knowledge import KnowledgeManager
|
from trustgraph.cores.knowledge import KnowledgeManager
|
||||||
from trustgraph.schema import KnowledgeResponse, Triples, GraphEmbeddings, Metadata, Triple, Value, EntityEmbeddings
|
from trustgraph.schema import KnowledgeResponse, Triples, GraphEmbeddings, Metadata, Triple, Term, EntityEmbeddings, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -77,9 +77,9 @@ def sample_triples():
|
||||||
),
|
),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.org/john", is_uri=True),
|
s=Term(type=IRI, iri="http://example.org/john"),
|
||||||
p=Value(value="http://example.org/name", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/name"),
|
||||||
o=Value(value="John Smith", is_uri=False)
|
o=Term(type=LITERAL, value="John Smith")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -97,7 +97,7 @@ def sample_graph_embeddings():
|
||||||
),
|
),
|
||||||
entities=[
|
entities=[
|
||||||
EntityEmbeddings(
|
EntityEmbeddings(
|
||||||
entity=Value(value="http://example.org/john", is_uri=True),
|
entity=Term(type=IRI, iri="http://example.org/john"),
|
||||||
vectors=[[0.1, 0.2, 0.3]]
|
vectors=[[0.1, 0.2, 0.3]]
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ collecting labels and definitions for entity embedding and retrieval.
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from trustgraph.extract.kg.ontology.extract import Processor
|
from trustgraph.extract.kg.ontology.extract import Processor
|
||||||
from trustgraph.schema.core.primitives import Triple, Value
|
from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL
|
||||||
from trustgraph.schema.knowledge.graph import EntityContext
|
from trustgraph.schema.knowledge.graph import EntityContext
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -25,9 +25,9 @@ class TestEntityContextBuilding:
|
||||||
"""Test that entity context is built from rdfs:label."""
|
"""Test that entity context is built from rdfs:label."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/cornish-pasty", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/cornish-pasty"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Cornish Pasty", is_uri=False)
|
o=Term(type=LITERAL, value="Cornish Pasty")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -35,16 +35,16 @@ class TestEntityContextBuilding:
|
||||||
|
|
||||||
assert len(contexts) == 1, "Should create one entity context"
|
assert len(contexts) == 1, "Should create one entity context"
|
||||||
assert isinstance(contexts[0], EntityContext)
|
assert isinstance(contexts[0], EntityContext)
|
||||||
assert contexts[0].entity.value == "https://example.com/entity/cornish-pasty"
|
assert contexts[0].entity.iri == "https://example.com/entity/cornish-pasty"
|
||||||
assert "Label: Cornish Pasty" in contexts[0].context
|
assert "Label: Cornish Pasty" in contexts[0].context
|
||||||
|
|
||||||
def test_builds_context_from_definition(self, processor):
|
def test_builds_context_from_definition(self, processor):
|
||||||
"""Test that entity context includes definitions."""
|
"""Test that entity context includes definitions."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/pasty", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/pasty"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value="A baked pastry filled with savory ingredients", is_uri=False)
|
o=Term(type=LITERAL, value="A baked pastry filled with savory ingredients")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -57,14 +57,14 @@ class TestEntityContextBuilding:
|
||||||
"""Test that label and definition are combined in context."""
|
"""Test that label and definition are combined in context."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Pasty Recipe", is_uri=False)
|
o=Term(type=LITERAL, value="Pasty Recipe")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value="Traditional Cornish pastry recipe", is_uri=False)
|
o=Term(type=LITERAL, value="Traditional Cornish pastry recipe")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -80,14 +80,14 @@ class TestEntityContextBuilding:
|
||||||
"""Test that only the first label is used in context."""
|
"""Test that only the first label is used in context."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="First Label", is_uri=False)
|
o=Term(type=LITERAL, value="First Label")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Second Label", is_uri=False)
|
o=Term(type=LITERAL, value="Second Label")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -101,14 +101,14 @@ class TestEntityContextBuilding:
|
||||||
"""Test that all definitions are included in context."""
|
"""Test that all definitions are included in context."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value="First definition", is_uri=False)
|
o=Term(type=LITERAL, value="First definition")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value="Second definition", is_uri=False)
|
o=Term(type=LITERAL, value="Second definition")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -123,9 +123,9 @@ class TestEntityContextBuilding:
|
||||||
"""Test that schema.org description is treated as definition."""
|
"""Test that schema.org description is treated as definition."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="https://schema.org/description", is_uri=True),
|
p=Term(type=IRI, iri="https://schema.org/description"),
|
||||||
o=Value(value="A delicious food item", is_uri=False)
|
o=Term(type=LITERAL, value="A delicious food item")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -138,26 +138,26 @@ class TestEntityContextBuilding:
|
||||||
"""Test that contexts are created for multiple entities."""
|
"""Test that contexts are created for multiple entities."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/entity1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/entity1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Entity One", is_uri=False)
|
o=Term(type=LITERAL, value="Entity One")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/entity2", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/entity2"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Entity Two", is_uri=False)
|
o=Term(type=LITERAL, value="Entity Two")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/entity3", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/entity3"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Entity Three", is_uri=False)
|
o=Term(type=LITERAL, value="Entity Three")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
contexts = processor.build_entity_contexts(triples)
|
contexts = processor.build_entity_contexts(triples)
|
||||||
|
|
||||||
assert len(contexts) == 3, "Should create context for each entity"
|
assert len(contexts) == 3, "Should create context for each entity"
|
||||||
entity_uris = [ctx.entity.value for ctx in contexts]
|
entity_uris = [ctx.entity.iri for ctx in contexts]
|
||||||
assert "https://example.com/entity/entity1" in entity_uris
|
assert "https://example.com/entity/entity1" in entity_uris
|
||||||
assert "https://example.com/entity/entity2" in entity_uris
|
assert "https://example.com/entity/entity2" in entity_uris
|
||||||
assert "https://example.com/entity/entity3" in entity_uris
|
assert "https://example.com/entity/entity3" in entity_uris
|
||||||
|
|
@ -166,9 +166,9 @@ class TestEntityContextBuilding:
|
||||||
"""Test that URI objects are ignored (only literal labels/definitions)."""
|
"""Test that URI objects are ignored (only literal labels/definitions)."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="https://example.com/some/uri", is_uri=True) # URI, not literal
|
o=Term(type=IRI, iri="https://example.com/some/uri") # URI, not literal
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -181,14 +181,14 @@ class TestEntityContextBuilding:
|
||||||
"""Test that other predicates are ignored."""
|
"""Test that other predicates are ignored."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value="http://example.com/Food", is_uri=True)
|
o=Term(type=IRI, iri="http://example.com/Food")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/food1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/food1"),
|
||||||
p=Value(value="http://example.com/produces", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/produces"),
|
||||||
o=Value(value="https://example.com/entity/food2", is_uri=True)
|
o=Term(type=IRI, iri="https://example.com/entity/food2")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -205,29 +205,29 @@ class TestEntityContextBuilding:
|
||||||
|
|
||||||
assert len(contexts) == 0, "Empty triple list should return empty contexts"
|
assert len(contexts) == 0, "Empty triple list should return empty contexts"
|
||||||
|
|
||||||
def test_entity_context_has_value_object(self, processor):
|
def test_entity_context_has_term_object(self, processor):
|
||||||
"""Test that EntityContext.entity is a Value object."""
|
"""Test that EntityContext.entity is a Term object."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/test", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/test"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Test Entity", is_uri=False)
|
o=Term(type=LITERAL, value="Test Entity")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
contexts = processor.build_entity_contexts(triples)
|
contexts = processor.build_entity_contexts(triples)
|
||||||
|
|
||||||
assert len(contexts) == 1
|
assert len(contexts) == 1
|
||||||
assert isinstance(contexts[0].entity, Value), "Entity should be Value object"
|
assert isinstance(contexts[0].entity, Term), "Entity should be Term object"
|
||||||
assert contexts[0].entity.is_uri, "Entity should be marked as URI"
|
assert contexts[0].entity.type == IRI, "Entity should be IRI type"
|
||||||
|
|
||||||
def test_entity_context_text_is_string(self, processor):
|
def test_entity_context_text_is_string(self, processor):
|
||||||
"""Test that EntityContext.context is a string."""
|
"""Test that EntityContext.context is a string."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/test", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/test"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Test Entity", is_uri=False)
|
o=Term(type=LITERAL, value="Test Entity")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -241,22 +241,22 @@ class TestEntityContextBuilding:
|
||||||
triples = [
|
triples = [
|
||||||
# Entity with label - should create context
|
# Entity with label - should create context
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/entity1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/entity1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Entity One", is_uri=False)
|
o=Term(type=LITERAL, value="Entity One")
|
||||||
),
|
),
|
||||||
# Entity with only rdf:type - should NOT create context
|
# Entity with only rdf:type - should NOT create context
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/entity2", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/entity2"),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value="http://example.com/Food", is_uri=True)
|
o=Term(type=IRI, iri="http://example.com/Food")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
contexts = processor.build_entity_contexts(triples)
|
contexts = processor.build_entity_contexts(triples)
|
||||||
|
|
||||||
assert len(contexts) == 1, "Should only create context for entity with label/definition"
|
assert len(contexts) == 1, "Should only create context for entity with label/definition"
|
||||||
assert contexts[0].entity.value == "https://example.com/entity/entity1"
|
assert contexts[0].entity.iri == "https://example.com/entity/entity1"
|
||||||
|
|
||||||
|
|
||||||
class TestEntityContextEdgeCases:
|
class TestEntityContextEdgeCases:
|
||||||
|
|
@ -266,9 +266,9 @@ class TestEntityContextEdgeCases:
|
||||||
"""Test handling of unicode characters in labels."""
|
"""Test handling of unicode characters in labels."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/café", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/café"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Café Spécial", is_uri=False)
|
o=Term(type=LITERAL, value="Café Spécial")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -282,9 +282,9 @@ class TestEntityContextEdgeCases:
|
||||||
long_def = "This is a very long definition " * 50
|
long_def = "This is a very long definition " * 50
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/test", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/test"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value=long_def, is_uri=False)
|
o=Term(type=LITERAL, value=long_def)
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -297,9 +297,9 @@ class TestEntityContextEdgeCases:
|
||||||
"""Test handling of special characters in context text."""
|
"""Test handling of special characters in context text."""
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/test", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/test"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Test & Entity <with> \"quotes\"", is_uri=False)
|
o=Term(type=LITERAL, value="Test & Entity <with> \"quotes\"")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -313,27 +313,27 @@ class TestEntityContextEdgeCases:
|
||||||
triples = [
|
triples = [
|
||||||
# Label - relevant
|
# Label - relevant
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#label", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2000/01/rdf-schema#label"),
|
||||||
o=Value(value="Cornish Pasty Recipe", is_uri=False)
|
o=Term(type=LITERAL, value="Cornish Pasty Recipe")
|
||||||
),
|
),
|
||||||
# Type - irrelevant
|
# Type - irrelevant
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value="http://example.com/Recipe", is_uri=True)
|
o=Term(type=IRI, iri="http://example.com/Recipe")
|
||||||
),
|
),
|
||||||
# Property - irrelevant
|
# Property - irrelevant
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://example.com/produces", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/produces"),
|
||||||
o=Value(value="https://example.com/entity/pasty", is_uri=True)
|
o=Term(type=IRI, iri="https://example.com/entity/pasty")
|
||||||
),
|
),
|
||||||
# Definition - relevant
|
# Definition - relevant
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="https://example.com/entity/recipe1", is_uri=True),
|
s=Term(type=IRI, iri="https://example.com/entity/recipe1"),
|
||||||
p=Value(value="http://www.w3.org/2004/02/skos/core#definition", is_uri=True),
|
p=Term(type=IRI, iri="http://www.w3.org/2004/02/skos/core#definition"),
|
||||||
o=Value(value="Traditional British pastry recipe", is_uri=False)
|
o=Term(type=LITERAL, value="Traditional British pastry recipe")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ the knowledge graph.
|
||||||
import pytest
|
import pytest
|
||||||
from trustgraph.extract.kg.ontology.extract import Processor
|
from trustgraph.extract.kg.ontology.extract import Processor
|
||||||
from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset
|
from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset
|
||||||
from trustgraph.schema.core.primitives import Triple, Value
|
from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -92,12 +92,12 @@ class TestOntologyTripleGeneration:
|
||||||
# Find type triples for Recipe class
|
# Find type triples for Recipe class
|
||||||
recipe_type_triples = [
|
recipe_type_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/Recipe"
|
if t.s.iri == "http://purl.org/ontology/fo/Recipe"
|
||||||
and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(recipe_type_triples) == 1, "Should generate exactly one type triple per class"
|
assert len(recipe_type_triples) == 1, "Should generate exactly one type triple per class"
|
||||||
assert recipe_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#Class", \
|
assert recipe_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#Class", \
|
||||||
"Class type should be owl:Class"
|
"Class type should be owl:Class"
|
||||||
|
|
||||||
def test_generates_class_labels(self, extractor, sample_ontology_subset):
|
def test_generates_class_labels(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -107,14 +107,14 @@ class TestOntologyTripleGeneration:
|
||||||
# Find label triples for Recipe class
|
# Find label triples for Recipe class
|
||||||
recipe_label_triples = [
|
recipe_label_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/Recipe"
|
if t.s.iri == "http://purl.org/ontology/fo/Recipe"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#label"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(recipe_label_triples) == 1, "Should generate label triple for class"
|
assert len(recipe_label_triples) == 1, "Should generate label triple for class"
|
||||||
assert recipe_label_triples[0].o.value == "Recipe", \
|
assert recipe_label_triples[0].o.value == "Recipe", \
|
||||||
"Label should match class label from ontology"
|
"Label should match class label from ontology"
|
||||||
assert not recipe_label_triples[0].o.is_uri, \
|
assert recipe_label_triples[0].o.type == LITERAL, \
|
||||||
"Label should be a literal, not URI"
|
"Label should be a literal, not URI"
|
||||||
|
|
||||||
def test_generates_class_comments(self, extractor, sample_ontology_subset):
|
def test_generates_class_comments(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -124,8 +124,8 @@ class TestOntologyTripleGeneration:
|
||||||
# Find comment triples for Recipe class
|
# Find comment triples for Recipe class
|
||||||
recipe_comment_triples = [
|
recipe_comment_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/Recipe"
|
if t.s.iri == "http://purl.org/ontology/fo/Recipe"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#comment"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#comment"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(recipe_comment_triples) == 1, "Should generate comment triple for class"
|
assert len(recipe_comment_triples) == 1, "Should generate comment triple for class"
|
||||||
|
|
@ -139,13 +139,13 @@ class TestOntologyTripleGeneration:
|
||||||
# Find type triples for ingredients property
|
# Find type triples for ingredients property
|
||||||
ingredients_type_triples = [
|
ingredients_type_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/ingredients"
|
if t.s.iri == "http://purl.org/ontology/fo/ingredients"
|
||||||
and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(ingredients_type_triples) == 1, \
|
assert len(ingredients_type_triples) == 1, \
|
||||||
"Should generate exactly one type triple per object property"
|
"Should generate exactly one type triple per object property"
|
||||||
assert ingredients_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#ObjectProperty", \
|
assert ingredients_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#ObjectProperty", \
|
||||||
"Object property type should be owl:ObjectProperty"
|
"Object property type should be owl:ObjectProperty"
|
||||||
|
|
||||||
def test_generates_object_property_labels(self, extractor, sample_ontology_subset):
|
def test_generates_object_property_labels(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -155,8 +155,8 @@ class TestOntologyTripleGeneration:
|
||||||
# Find label triples for ingredients property
|
# Find label triples for ingredients property
|
||||||
ingredients_label_triples = [
|
ingredients_label_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/ingredients"
|
if t.s.iri == "http://purl.org/ontology/fo/ingredients"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#label"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(ingredients_label_triples) == 1, \
|
assert len(ingredients_label_triples) == 1, \
|
||||||
|
|
@ -171,15 +171,15 @@ class TestOntologyTripleGeneration:
|
||||||
# Find domain triples for ingredients property
|
# Find domain triples for ingredients property
|
||||||
ingredients_domain_triples = [
|
ingredients_domain_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/ingredients"
|
if t.s.iri == "http://purl.org/ontology/fo/ingredients"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#domain"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#domain"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(ingredients_domain_triples) == 1, \
|
assert len(ingredients_domain_triples) == 1, \
|
||||||
"Should generate domain triple for object property"
|
"Should generate domain triple for object property"
|
||||||
assert ingredients_domain_triples[0].o.value == "http://purl.org/ontology/fo/Recipe", \
|
assert ingredients_domain_triples[0].o.iri == "http://purl.org/ontology/fo/Recipe", \
|
||||||
"Domain should be Recipe class URI"
|
"Domain should be Recipe class URI"
|
||||||
assert ingredients_domain_triples[0].o.is_uri, \
|
assert ingredients_domain_triples[0].o.type == IRI, \
|
||||||
"Domain should be a URI reference"
|
"Domain should be a URI reference"
|
||||||
|
|
||||||
def test_generates_object_property_range(self, extractor, sample_ontology_subset):
|
def test_generates_object_property_range(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -189,13 +189,13 @@ class TestOntologyTripleGeneration:
|
||||||
# Find range triples for produces property
|
# Find range triples for produces property
|
||||||
produces_range_triples = [
|
produces_range_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/produces"
|
if t.s.iri == "http://purl.org/ontology/fo/produces"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#range"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#range"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(produces_range_triples) == 1, \
|
assert len(produces_range_triples) == 1, \
|
||||||
"Should generate range triple for object property"
|
"Should generate range triple for object property"
|
||||||
assert produces_range_triples[0].o.value == "http://purl.org/ontology/fo/Food", \
|
assert produces_range_triples[0].o.iri == "http://purl.org/ontology/fo/Food", \
|
||||||
"Range should be Food class URI"
|
"Range should be Food class URI"
|
||||||
|
|
||||||
def test_generates_datatype_property_type_triples(self, extractor, sample_ontology_subset):
|
def test_generates_datatype_property_type_triples(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -205,13 +205,13 @@ class TestOntologyTripleGeneration:
|
||||||
# Find type triples for serves property
|
# Find type triples for serves property
|
||||||
serves_type_triples = [
|
serves_type_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/serves"
|
if t.s.iri == "http://purl.org/ontology/fo/serves"
|
||||||
and t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
and t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(serves_type_triples) == 1, \
|
assert len(serves_type_triples) == 1, \
|
||||||
"Should generate exactly one type triple per datatype property"
|
"Should generate exactly one type triple per datatype property"
|
||||||
assert serves_type_triples[0].o.value == "http://www.w3.org/2002/07/owl#DatatypeProperty", \
|
assert serves_type_triples[0].o.iri == "http://www.w3.org/2002/07/owl#DatatypeProperty", \
|
||||||
"Datatype property type should be owl:DatatypeProperty"
|
"Datatype property type should be owl:DatatypeProperty"
|
||||||
|
|
||||||
def test_generates_datatype_property_range(self, extractor, sample_ontology_subset):
|
def test_generates_datatype_property_range(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -221,13 +221,13 @@ class TestOntologyTripleGeneration:
|
||||||
# Find range triples for serves property
|
# Find range triples for serves property
|
||||||
serves_range_triples = [
|
serves_range_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.s.value == "http://purl.org/ontology/fo/serves"
|
if t.s.iri == "http://purl.org/ontology/fo/serves"
|
||||||
and t.p.value == "http://www.w3.org/2000/01/rdf-schema#range"
|
and t.p.iri == "http://www.w3.org/2000/01/rdf-schema#range"
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len(serves_range_triples) == 1, \
|
assert len(serves_range_triples) == 1, \
|
||||||
"Should generate range triple for datatype property"
|
"Should generate range triple for datatype property"
|
||||||
assert serves_range_triples[0].o.value == "http://www.w3.org/2001/XMLSchema#string", \
|
assert serves_range_triples[0].o.iri == "http://www.w3.org/2001/XMLSchema#string", \
|
||||||
"Range should be XSD type URI (xsd:string expanded)"
|
"Range should be XSD type URI (xsd:string expanded)"
|
||||||
|
|
||||||
def test_generates_triples_for_all_classes(self, extractor, sample_ontology_subset):
|
def test_generates_triples_for_all_classes(self, extractor, sample_ontology_subset):
|
||||||
|
|
@ -236,9 +236,9 @@ class TestOntologyTripleGeneration:
|
||||||
|
|
||||||
# Count unique class subjects
|
# Count unique class subjects
|
||||||
class_subjects = set(
|
class_subjects = set(
|
||||||
t.s.value for t in triples
|
t.s.iri for t in triples
|
||||||
if t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
if t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
and t.o.value == "http://www.w3.org/2002/07/owl#Class"
|
and t.o.iri == "http://www.w3.org/2002/07/owl#Class"
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(class_subjects) == 3, \
|
assert len(class_subjects) == 3, \
|
||||||
|
|
@ -250,9 +250,9 @@ class TestOntologyTripleGeneration:
|
||||||
|
|
||||||
# Count unique property subjects (object + datatype properties)
|
# Count unique property subjects (object + datatype properties)
|
||||||
property_subjects = set(
|
property_subjects = set(
|
||||||
t.s.value for t in triples
|
t.s.iri for t in triples
|
||||||
if t.p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
if t.p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
and ("ObjectProperty" in t.o.value or "DatatypeProperty" in t.o.value)
|
and ("ObjectProperty" in t.o.iri or "DatatypeProperty" in t.o.iri)
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(property_subjects) == 3, \
|
assert len(property_subjects) == 3, \
|
||||||
|
|
@ -276,7 +276,7 @@ class TestOntologyTripleGeneration:
|
||||||
# Should still generate proper RDF triples despite dict field names
|
# Should still generate proper RDF triples despite dict field names
|
||||||
label_triples = [
|
label_triples = [
|
||||||
t for t in triples
|
t for t in triples
|
||||||
if t.p.value == "http://www.w3.org/2000/01/rdf-schema#label"
|
if t.p.iri == "http://www.w3.org/2000/01/rdf-schema#label"
|
||||||
]
|
]
|
||||||
assert len(label_triples) > 0, \
|
assert len(label_triples) > 0, \
|
||||||
"Should generate rdfs:label triples from dict 'labels' field"
|
"Should generate rdfs:label triples from dict 'labels' field"
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ and extracts/validates triples from LLM responses.
|
||||||
import pytest
|
import pytest
|
||||||
from trustgraph.extract.kg.ontology.extract import Processor
|
from trustgraph.extract.kg.ontology.extract import Processor
|
||||||
from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset
|
from trustgraph.extract.kg.ontology.ontology_selector import OntologySubset
|
||||||
from trustgraph.schema.core.primitives import Triple, Value
|
from trustgraph.schema.core.primitives import Triple, Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
@ -248,9 +248,9 @@ class TestTripleParsing:
|
||||||
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
||||||
|
|
||||||
assert len(validated) == 1, "Should parse one valid triple"
|
assert len(validated) == 1, "Should parse one valid triple"
|
||||||
assert validated[0].s.value == "https://trustgraph.ai/food/cornish-pasty"
|
assert validated[0].s.iri == "https://trustgraph.ai/food/cornish-pasty"
|
||||||
assert validated[0].p.value == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
assert validated[0].p.iri == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
||||||
assert validated[0].o.value == "http://purl.org/ontology/fo/Recipe"
|
assert validated[0].o.iri == "http://purl.org/ontology/fo/Recipe"
|
||||||
|
|
||||||
def test_parse_multiple_triples(self, extractor, sample_ontology_subset):
|
def test_parse_multiple_triples(self, extractor, sample_ontology_subset):
|
||||||
"""Test parsing multiple triples."""
|
"""Test parsing multiple triples."""
|
||||||
|
|
@ -307,11 +307,11 @@ class TestTripleParsing:
|
||||||
|
|
||||||
assert len(validated) == 1
|
assert len(validated) == 1
|
||||||
# Subject should be expanded to entity URI
|
# Subject should be expanded to entity URI
|
||||||
assert validated[0].s.value.startswith("https://trustgraph.ai/food/")
|
assert validated[0].s.iri.startswith("https://trustgraph.ai/food/")
|
||||||
# Predicate should be expanded to ontology URI
|
# Predicate should be expanded to ontology URI
|
||||||
assert validated[0].p.value == "http://purl.org/ontology/fo/produces"
|
assert validated[0].p.iri == "http://purl.org/ontology/fo/produces"
|
||||||
# Object should be expanded to class URI
|
# Object should be expanded to class URI
|
||||||
assert validated[0].o.value == "http://purl.org/ontology/fo/Food"
|
assert validated[0].o.iri == "http://purl.org/ontology/fo/Food"
|
||||||
|
|
||||||
def test_creates_proper_triple_objects(self, extractor, sample_ontology_subset):
|
def test_creates_proper_triple_objects(self, extractor, sample_ontology_subset):
|
||||||
"""Test that Triple objects are properly created."""
|
"""Test that Triple objects are properly created."""
|
||||||
|
|
@ -324,12 +324,12 @@ class TestTripleParsing:
|
||||||
assert len(validated) == 1
|
assert len(validated) == 1
|
||||||
triple = validated[0]
|
triple = validated[0]
|
||||||
assert isinstance(triple, Triple), "Should create Triple objects"
|
assert isinstance(triple, Triple), "Should create Triple objects"
|
||||||
assert isinstance(triple.s, Value), "Subject should be Value object"
|
assert isinstance(triple.s, Term), "Subject should be Term object"
|
||||||
assert isinstance(triple.p, Value), "Predicate should be Value object"
|
assert isinstance(triple.p, Term), "Predicate should be Term object"
|
||||||
assert isinstance(triple.o, Value), "Object should be Value object"
|
assert isinstance(triple.o, Term), "Object should be Term object"
|
||||||
assert triple.s.is_uri, "Subject should be marked as URI"
|
assert triple.s.type == IRI, "Subject should be IRI type"
|
||||||
assert triple.p.is_uri, "Predicate should be marked as URI"
|
assert triple.p.type == IRI, "Predicate should be IRI type"
|
||||||
assert not triple.o.is_uri, "Object literal should not be marked as URI"
|
assert triple.o.type == LITERAL, "Object literal should be LITERAL type"
|
||||||
|
|
||||||
|
|
||||||
class TestURIExpansionInExtraction:
|
class TestURIExpansionInExtraction:
|
||||||
|
|
@ -343,8 +343,8 @@ class TestURIExpansionInExtraction:
|
||||||
|
|
||||||
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
||||||
|
|
||||||
assert validated[0].o.value == "http://purl.org/ontology/fo/Recipe"
|
assert validated[0].o.iri == "http://purl.org/ontology/fo/Recipe"
|
||||||
assert validated[0].o.is_uri, "Class reference should be URI"
|
assert validated[0].o.type == IRI, "Class reference should be URI"
|
||||||
|
|
||||||
def test_expands_property_names(self, extractor, sample_ontology_subset):
|
def test_expands_property_names(self, extractor, sample_ontology_subset):
|
||||||
"""Test that property names are expanded to full URIs."""
|
"""Test that property names are expanded to full URIs."""
|
||||||
|
|
@ -354,7 +354,7 @@ class TestURIExpansionInExtraction:
|
||||||
|
|
||||||
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
||||||
|
|
||||||
assert validated[0].p.value == "http://purl.org/ontology/fo/produces"
|
assert validated[0].p.iri == "http://purl.org/ontology/fo/produces"
|
||||||
|
|
||||||
def test_expands_entity_instances(self, extractor, sample_ontology_subset):
|
def test_expands_entity_instances(self, extractor, sample_ontology_subset):
|
||||||
"""Test that entity instances get constructed URIs."""
|
"""Test that entity instances get constructed URIs."""
|
||||||
|
|
@ -364,8 +364,8 @@ class TestURIExpansionInExtraction:
|
||||||
|
|
||||||
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
validated = extractor.parse_and_validate_triples(triples_response, sample_ontology_subset)
|
||||||
|
|
||||||
assert validated[0].s.value.startswith("https://trustgraph.ai/food/")
|
assert validated[0].s.iri.startswith("https://trustgraph.ai/food/")
|
||||||
assert "my-special-recipe" in validated[0].s.value
|
assert "my-special-recipe" in validated[0].s.iri
|
||||||
|
|
||||||
|
|
||||||
class TestEdgeCases:
|
class TestEdgeCases:
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
from trustgraph.gateway.dispatch.serialize import to_value, to_subgraph, serialize_value
|
from trustgraph.gateway.dispatch.serialize import to_value, to_subgraph, serialize_value
|
||||||
from trustgraph.schema import Value, Triple
|
from trustgraph.schema import Term, Triple, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestDispatchSerialize:
|
class TestDispatchSerialize:
|
||||||
|
|
@ -14,36 +14,36 @@ class TestDispatchSerialize:
|
||||||
|
|
||||||
def test_to_value_with_uri(self):
|
def test_to_value_with_uri(self):
|
||||||
"""Test to_value function with URI"""
|
"""Test to_value function with URI"""
|
||||||
input_data = {"v": "http://example.com/resource", "e": True}
|
input_data = {"t": "i", "i": "http://example.com/resource"}
|
||||||
|
|
||||||
result = to_value(input_data)
|
result = to_value(input_data)
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_to_value_with_literal(self):
|
def test_to_value_with_literal(self):
|
||||||
"""Test to_value function with literal value"""
|
"""Test to_value function with literal value"""
|
||||||
input_data = {"v": "literal string", "e": False}
|
input_data = {"t": "l", "v": "literal string"}
|
||||||
|
|
||||||
result = to_value(input_data)
|
result = to_value(input_data)
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "literal string"
|
assert result.value == "literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_to_subgraph_with_multiple_triples(self):
|
def test_to_subgraph_with_multiple_triples(self):
|
||||||
"""Test to_subgraph function with multiple triples"""
|
"""Test to_subgraph function with multiple triples"""
|
||||||
input_data = [
|
input_data = [
|
||||||
{
|
{
|
||||||
"s": {"v": "subject1", "e": True},
|
"s": {"t": "i", "i": "subject1"},
|
||||||
"p": {"v": "predicate1", "e": True},
|
"p": {"t": "i", "i": "predicate1"},
|
||||||
"o": {"v": "object1", "e": False}
|
"o": {"t": "l", "v": "object1"}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"s": {"v": "subject2", "e": False},
|
"s": {"t": "l", "v": "subject2"},
|
||||||
"p": {"v": "predicate2", "e": True},
|
"p": {"t": "i", "i": "predicate2"},
|
||||||
"o": {"v": "object2", "e": True}
|
"o": {"t": "i", "i": "object2"}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -53,16 +53,16 @@ class TestDispatchSerialize:
|
||||||
assert all(isinstance(triple, Triple) for triple in result)
|
assert all(isinstance(triple, Triple) for triple in result)
|
||||||
|
|
||||||
# Check first triple
|
# Check first triple
|
||||||
assert result[0].s.value == "subject1"
|
assert result[0].s.iri == "subject1"
|
||||||
assert result[0].s.is_uri is True
|
assert result[0].s.type == IRI
|
||||||
assert result[0].p.value == "predicate1"
|
assert result[0].p.iri == "predicate1"
|
||||||
assert result[0].p.is_uri is True
|
assert result[0].p.type == IRI
|
||||||
assert result[0].o.value == "object1"
|
assert result[0].o.value == "object1"
|
||||||
assert result[0].o.is_uri is False
|
assert result[0].o.type == LITERAL
|
||||||
|
|
||||||
# Check second triple
|
# Check second triple
|
||||||
assert result[1].s.value == "subject2"
|
assert result[1].s.value == "subject2"
|
||||||
assert result[1].s.is_uri is False
|
assert result[1].s.type == LITERAL
|
||||||
|
|
||||||
def test_to_subgraph_with_empty_list(self):
|
def test_to_subgraph_with_empty_list(self):
|
||||||
"""Test to_subgraph function with empty input"""
|
"""Test to_subgraph function with empty input"""
|
||||||
|
|
@ -74,16 +74,16 @@ class TestDispatchSerialize:
|
||||||
|
|
||||||
def test_serialize_value_with_uri(self):
|
def test_serialize_value_with_uri(self):
|
||||||
"""Test serialize_value function with URI value"""
|
"""Test serialize_value function with URI value"""
|
||||||
value = Value(value="http://example.com/test", is_uri=True)
|
term = Term(type=IRI, iri="http://example.com/test")
|
||||||
|
|
||||||
result = serialize_value(value)
|
result = serialize_value(term)
|
||||||
|
|
||||||
assert result == {"v": "http://example.com/test", "e": True}
|
assert result == {"t": "i", "i": "http://example.com/test"}
|
||||||
|
|
||||||
def test_serialize_value_with_literal(self):
|
def test_serialize_value_with_literal(self):
|
||||||
"""Test serialize_value function with literal value"""
|
"""Test serialize_value function with literal value"""
|
||||||
value = Value(value="test literal", is_uri=False)
|
term = Term(type=LITERAL, value="test literal")
|
||||||
|
|
||||||
result = serialize_value(value)
|
result = serialize_value(term)
|
||||||
|
|
||||||
assert result == {"v": "test literal", "e": False}
|
assert result == {"t": "l", "v": "test literal"}
|
||||||
|
|
@ -6,11 +6,21 @@ import pytest
|
||||||
from unittest.mock import Mock, AsyncMock
|
from unittest.mock import Mock, AsyncMock
|
||||||
|
|
||||||
# Mock schema classes for testing
|
# Mock schema classes for testing
|
||||||
class Value:
|
# Term type constants
|
||||||
def __init__(self, value, is_uri, type):
|
IRI = "i"
|
||||||
self.value = value
|
LITERAL = "l"
|
||||||
self.is_uri = is_uri
|
BLANK = "b"
|
||||||
|
TRIPLE = "t"
|
||||||
|
|
||||||
|
class Term:
|
||||||
|
def __init__(self, type, iri=None, value=None, id=None, datatype=None, language=None, triple=None):
|
||||||
self.type = type
|
self.type = type
|
||||||
|
self.iri = iri
|
||||||
|
self.value = value
|
||||||
|
self.id = id
|
||||||
|
self.datatype = datatype
|
||||||
|
self.language = language
|
||||||
|
self.triple = triple
|
||||||
|
|
||||||
class Triple:
|
class Triple:
|
||||||
def __init__(self, s, p, o):
|
def __init__(self, s, p, o):
|
||||||
|
|
@ -66,32 +76,30 @@ def sample_relationships():
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_value_uri():
|
def sample_term_uri():
|
||||||
"""Sample URI Value object"""
|
"""Sample URI Term object"""
|
||||||
return Value(
|
return Term(
|
||||||
value="http://example.com/person/john-smith",
|
type=IRI,
|
||||||
is_uri=True,
|
iri="http://example.com/person/john-smith"
|
||||||
type=""
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_value_literal():
|
def sample_term_literal():
|
||||||
"""Sample literal Value object"""
|
"""Sample literal Term object"""
|
||||||
return Value(
|
return Term(
|
||||||
value="John Smith",
|
type=LITERAL,
|
||||||
is_uri=False,
|
value="John Smith"
|
||||||
type="string"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_triple(sample_value_uri, sample_value_literal):
|
def sample_triple(sample_term_uri, sample_term_literal):
|
||||||
"""Sample Triple object"""
|
"""Sample Triple object"""
|
||||||
return Triple(
|
return Triple(
|
||||||
s=sample_value_uri,
|
s=sample_term_uri,
|
||||||
p=Value(value="http://schema.org/name", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/name"),
|
||||||
o=sample_value_literal
|
o=sample_term_literal
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import json
|
||||||
from unittest.mock import AsyncMock, MagicMock, patch
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
||||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
|
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
|
||||||
from trustgraph.schema import EntityContext, EntityContexts
|
from trustgraph.schema import EntityContext, EntityContexts
|
||||||
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||||
from trustgraph.template.prompt_manager import PromptManager
|
from trustgraph.template.prompt_manager import PromptManager
|
||||||
|
|
@ -53,9 +53,9 @@ class TestAgentKgExtractor:
|
||||||
id="doc123",
|
id="doc123",
|
||||||
metadata=[
|
metadata=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="doc123", is_uri=True),
|
s=Term(type=IRI, iri="doc123"),
|
||||||
p=Value(value="http://example.org/type", is_uri=True),
|
p=Term(type=IRI, iri="http://example.org/type"),
|
||||||
o=Value(value="document", is_uri=False)
|
o=Term(type=LITERAL, value="document")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -178,27 +178,27 @@ This is not JSON at all
|
||||||
triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
|
triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
|
||||||
|
|
||||||
# Check entity label triple
|
# Check entity label triple
|
||||||
label_triple = next((t for t in triples if t.p.value == RDF_LABEL and t.o.value == "Machine Learning"), None)
|
label_triple = next((t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "Machine Learning"), None)
|
||||||
assert label_triple is not None
|
assert label_triple is not None
|
||||||
assert label_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
assert label_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
||||||
assert label_triple.s.is_uri == True
|
assert label_triple.s.type == IRI
|
||||||
assert label_triple.o.is_uri == False
|
assert label_triple.o.type == LITERAL
|
||||||
|
|
||||||
# Check definition triple
|
# Check definition triple
|
||||||
def_triple = next((t for t in triples if t.p.value == DEFINITION), None)
|
def_triple = next((t for t in triples if t.p.iri == DEFINITION), None)
|
||||||
assert def_triple is not None
|
assert def_triple is not None
|
||||||
assert def_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
assert def_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
||||||
assert def_triple.o.value == "A subset of AI that enables learning from data."
|
assert def_triple.o.value == "A subset of AI that enables learning from data."
|
||||||
|
|
||||||
# Check subject-of triple
|
# Check subject-of triple
|
||||||
subject_of_triple = next((t for t in triples if t.p.value == SUBJECT_OF), None)
|
subject_of_triple = next((t for t in triples if t.p.iri == SUBJECT_OF), None)
|
||||||
assert subject_of_triple is not None
|
assert subject_of_triple is not None
|
||||||
assert subject_of_triple.s.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
assert subject_of_triple.s.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
||||||
assert subject_of_triple.o.value == "doc123"
|
assert subject_of_triple.o.iri == "doc123"
|
||||||
|
|
||||||
# Check entity context
|
# Check entity context
|
||||||
assert len(entity_contexts) == 1
|
assert len(entity_contexts) == 1
|
||||||
assert entity_contexts[0].entity.value == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
assert entity_contexts[0].entity.iri == f"{TRUSTGRAPH_ENTITIES}Machine%20Learning"
|
||||||
assert entity_contexts[0].context == "A subset of AI that enables learning from data."
|
assert entity_contexts[0].context == "A subset of AI that enables learning from data."
|
||||||
|
|
||||||
def test_process_extraction_data_relationships(self, agent_extractor, sample_metadata):
|
def test_process_extraction_data_relationships(self, agent_extractor, sample_metadata):
|
||||||
|
|
@ -220,23 +220,23 @@ This is not JSON at all
|
||||||
predicate_uri = f"{TRUSTGRAPH_ENTITIES}is_subset_of"
|
predicate_uri = f"{TRUSTGRAPH_ENTITIES}is_subset_of"
|
||||||
|
|
||||||
# Find label triples
|
# Find label triples
|
||||||
subject_label = next((t for t in triples if t.s.value == subject_uri and t.p.value == RDF_LABEL), None)
|
subject_label = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == RDF_LABEL), None)
|
||||||
assert subject_label is not None
|
assert subject_label is not None
|
||||||
assert subject_label.o.value == "Machine Learning"
|
assert subject_label.o.value == "Machine Learning"
|
||||||
|
|
||||||
predicate_label = next((t for t in triples if t.s.value == predicate_uri and t.p.value == RDF_LABEL), None)
|
predicate_label = next((t for t in triples if t.s.iri == predicate_uri and t.p.iri == RDF_LABEL), None)
|
||||||
assert predicate_label is not None
|
assert predicate_label is not None
|
||||||
assert predicate_label.o.value == "is_subset_of"
|
assert predicate_label.o.value == "is_subset_of"
|
||||||
|
|
||||||
# Check main relationship triple
|
# Check main relationship triple
|
||||||
object_uri = f"{TRUSTGRAPH_ENTITIES}Artificial%20Intelligence"
|
object_uri = f"{TRUSTGRAPH_ENTITIES}Artificial%20Intelligence"
|
||||||
rel_triple = next((t for t in triples if t.s.value == subject_uri and t.p.value == predicate_uri), None)
|
rel_triple = next((t for t in triples if t.s.iri == subject_uri and t.p.iri == predicate_uri), None)
|
||||||
assert rel_triple is not None
|
assert rel_triple is not None
|
||||||
assert rel_triple.o.value == object_uri
|
assert rel_triple.o.iri == object_uri
|
||||||
assert rel_triple.o.is_uri == True
|
assert rel_triple.o.type == IRI
|
||||||
|
|
||||||
# Check subject-of relationships
|
# Check subject-of relationships
|
||||||
subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF and t.o.value == "doc123"]
|
subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF and t.o.iri == "doc123"]
|
||||||
assert len(subject_of_triples) >= 2 # At least subject and predicate should have subject-of relations
|
assert len(subject_of_triples) >= 2 # At least subject and predicate should have subject-of relations
|
||||||
|
|
||||||
def test_process_extraction_data_literal_object(self, agent_extractor, sample_metadata):
|
def test_process_extraction_data_literal_object(self, agent_extractor, sample_metadata):
|
||||||
|
|
@ -254,7 +254,7 @@ This is not JSON at all
|
||||||
triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
|
triples, entity_contexts = agent_extractor.process_extraction_data(data, sample_metadata)
|
||||||
|
|
||||||
# Check that object labels are not created for literal objects
|
# Check that object labels are not created for literal objects
|
||||||
object_labels = [t for t in triples if t.p.value == RDF_LABEL and t.o.value == "95%"]
|
object_labels = [t for t in triples if t.p.iri == RDF_LABEL and t.o.value == "95%"]
|
||||||
# Based on the code logic, it should not create object labels for non-entity objects
|
# Based on the code logic, it should not create object labels for non-entity objects
|
||||||
# But there might be a bug in the original implementation
|
# But there might be a bug in the original implementation
|
||||||
|
|
||||||
|
|
@ -263,12 +263,12 @@ This is not JSON at all
|
||||||
triples, entity_contexts = agent_extractor.process_extraction_data(sample_extraction_data, sample_metadata)
|
triples, entity_contexts = agent_extractor.process_extraction_data(sample_extraction_data, sample_metadata)
|
||||||
|
|
||||||
# Check that we have both definition and relationship triples
|
# Check that we have both definition and relationship triples
|
||||||
definition_triples = [t for t in triples if t.p.value == DEFINITION]
|
definition_triples = [t for t in triples if t.p.iri == DEFINITION]
|
||||||
assert len(definition_triples) == 2 # Two definitions
|
assert len(definition_triples) == 2 # Two definitions
|
||||||
|
|
||||||
# Check entity contexts are created for definitions
|
# Check entity contexts are created for definitions
|
||||||
assert len(entity_contexts) == 2
|
assert len(entity_contexts) == 2
|
||||||
entity_uris = [ec.entity.value for ec in entity_contexts]
|
entity_uris = [ec.entity.iri for ec in entity_contexts]
|
||||||
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
|
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
|
||||||
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
|
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
|
||||||
|
|
||||||
|
|
@ -282,7 +282,7 @@ This is not JSON at all
|
||||||
triples, entity_contexts = agent_extractor.process_extraction_data(data, metadata)
|
triples, entity_contexts = agent_extractor.process_extraction_data(data, metadata)
|
||||||
|
|
||||||
# Should not create subject-of relationships when no metadata ID
|
# Should not create subject-of relationships when no metadata ID
|
||||||
subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
|
subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF]
|
||||||
assert len(subject_of_triples) == 0
|
assert len(subject_of_triples) == 0
|
||||||
|
|
||||||
# Should still create entity contexts
|
# Should still create entity contexts
|
||||||
|
|
@ -330,9 +330,9 @@ This is not JSON at all
|
||||||
|
|
||||||
test_triples = [
|
test_triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="test:subject", is_uri=True),
|
s=Term(type=IRI, iri="test:subject"),
|
||||||
p=Value(value="test:predicate", is_uri=True),
|
p=Term(type=IRI, iri="test:predicate"),
|
||||||
o=Value(value="test object", is_uri=False)
|
o=Term(type=LITERAL, value="test object")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -348,7 +348,7 @@ This is not JSON at all
|
||||||
# Note: metadata.metadata is now empty array in the new implementation
|
# Note: metadata.metadata is now empty array in the new implementation
|
||||||
assert sent_triples.metadata.metadata == []
|
assert sent_triples.metadata.metadata == []
|
||||||
assert len(sent_triples.triples) == 1
|
assert len(sent_triples.triples) == 1
|
||||||
assert sent_triples.triples[0].s.value == "test:subject"
|
assert sent_triples.triples[0].s.iri == "test:subject"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_emit_entity_contexts(self, agent_extractor, sample_metadata):
|
async def test_emit_entity_contexts(self, agent_extractor, sample_metadata):
|
||||||
|
|
@ -357,7 +357,7 @@ This is not JSON at all
|
||||||
|
|
||||||
test_contexts = [
|
test_contexts = [
|
||||||
EntityContext(
|
EntityContext(
|
||||||
entity=Value(value="test:entity", is_uri=True),
|
entity=Term(type=IRI, iri="test:entity"),
|
||||||
context="Test context"
|
context="Test context"
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
@ -374,7 +374,7 @@ This is not JSON at all
|
||||||
# Note: metadata.metadata is now empty array in the new implementation
|
# Note: metadata.metadata is now empty array in the new implementation
|
||||||
assert sent_contexts.metadata.metadata == []
|
assert sent_contexts.metadata.metadata == []
|
||||||
assert len(sent_contexts.entities) == 1
|
assert len(sent_contexts.entities) == 1
|
||||||
assert sent_contexts.entities[0].entity.value == "test:entity"
|
assert sent_contexts.entities[0].entity.iri == "test:entity"
|
||||||
|
|
||||||
def test_agent_extractor_initialization_params(self):
|
def test_agent_extractor_initialization_params(self):
|
||||||
"""Test agent extractor parameter validation"""
|
"""Test agent extractor parameter validation"""
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import urllib.parse
|
||||||
from unittest.mock import AsyncMock, MagicMock
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
||||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value
|
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||||
from trustgraph.schema import EntityContext, EntityContexts
|
from trustgraph.schema import EntityContext, EntityContexts
|
||||||
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||||
|
|
||||||
|
|
@ -188,7 +188,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
triples, contexts = agent_extractor.process_extraction_data(data, metadata)
|
triples, contexts = agent_extractor.process_extraction_data(data, metadata)
|
||||||
|
|
||||||
# Should not create subject-of triples when ID is empty string
|
# Should not create subject-of triples when ID is empty string
|
||||||
subject_of_triples = [t for t in triples if t.p.value == SUBJECT_OF]
|
subject_of_triples = [t for t in triples if t.p.iri == SUBJECT_OF]
|
||||||
assert len(subject_of_triples) == 0
|
assert len(subject_of_triples) == 0
|
||||||
|
|
||||||
def test_process_extraction_data_special_entity_names(self, agent_extractor):
|
def test_process_extraction_data_special_entity_names(self, agent_extractor):
|
||||||
|
|
@ -221,7 +221,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
# Verify URIs were properly encoded
|
# Verify URIs were properly encoded
|
||||||
for i, entity in enumerate(special_entities):
|
for i, entity in enumerate(special_entities):
|
||||||
expected_uri = f"{TRUSTGRAPH_ENTITIES}{urllib.parse.quote(entity)}"
|
expected_uri = f"{TRUSTGRAPH_ENTITIES}{urllib.parse.quote(entity)}"
|
||||||
assert contexts[i].entity.value == expected_uri
|
assert contexts[i].entity.iri == expected_uri
|
||||||
|
|
||||||
def test_process_extraction_data_very_long_definitions(self, agent_extractor):
|
def test_process_extraction_data_very_long_definitions(self, agent_extractor):
|
||||||
"""Test processing with very long entity definitions"""
|
"""Test processing with very long entity definitions"""
|
||||||
|
|
@ -241,7 +241,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
assert contexts[0].context == long_definition
|
assert contexts[0].context == long_definition
|
||||||
|
|
||||||
# Find definition triple
|
# Find definition triple
|
||||||
def_triple = next((t for t in triples if t.p.value == DEFINITION), None)
|
def_triple = next((t for t in triples if t.p.iri == DEFINITION), None)
|
||||||
assert def_triple is not None
|
assert def_triple is not None
|
||||||
assert def_triple.o.value == long_definition
|
assert def_triple.o.value == long_definition
|
||||||
|
|
||||||
|
|
@ -262,7 +262,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
assert len(contexts) == 4
|
assert len(contexts) == 4
|
||||||
|
|
||||||
# Check that both definitions for "Machine Learning" are present
|
# Check that both definitions for "Machine Learning" are present
|
||||||
ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.value]
|
ml_contexts = [ec for ec in contexts if "Machine%20Learning" in ec.entity.iri]
|
||||||
assert len(ml_contexts) == 2
|
assert len(ml_contexts) == 2
|
||||||
assert ml_contexts[0].context == "First definition"
|
assert ml_contexts[0].context == "First definition"
|
||||||
assert ml_contexts[1].context == "Second definition"
|
assert ml_contexts[1].context == "Second definition"
|
||||||
|
|
@ -286,7 +286,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
assert len(contexts) == 3
|
assert len(contexts) == 3
|
||||||
|
|
||||||
# Empty entity should create empty URI after encoding
|
# Empty entity should create empty URI after encoding
|
||||||
empty_entity_context = next((ec for ec in contexts if ec.entity.value == TRUSTGRAPH_ENTITIES), None)
|
empty_entity_context = next((ec for ec in contexts if ec.entity.iri == TRUSTGRAPH_ENTITIES), None)
|
||||||
assert empty_entity_context is not None
|
assert empty_entity_context is not None
|
||||||
|
|
||||||
def test_process_extraction_data_nested_json_in_strings(self, agent_extractor):
|
def test_process_extraction_data_nested_json_in_strings(self, agent_extractor):
|
||||||
|
|
@ -338,7 +338,7 @@ class TestAgentKgExtractionEdgeCases:
|
||||||
|
|
||||||
# Should process all relationships
|
# Should process all relationships
|
||||||
# Note: The current implementation has some logic issues that these tests document
|
# Note: The current implementation has some logic issues that these tests document
|
||||||
assert len([t for t in triples if t.p.value != RDF_LABEL and t.p.value != SUBJECT_OF]) >= 7
|
assert len([t for t in triples if t.p.iri != RDF_LABEL and t.p.iri != SUBJECT_OF]) >= 7
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_emit_empty_collections(self, agent_extractor):
|
async def test_emit_empty_collections(self, agent_extractor):
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ processing graph structures, and performing graph operations.
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
from .conftest import Triple, Value, Metadata
|
from .conftest import Triple, Metadata
|
||||||
from collections import defaultdict, deque
|
from collections import defaultdict, deque
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,13 @@
|
||||||
Unit tests for triple construction logic
|
Unit tests for triple construction logic
|
||||||
|
|
||||||
Tests the core business logic for constructing RDF triples from extracted
|
Tests the core business logic for constructing RDF triples from extracted
|
||||||
entities and relationships, including URI generation, Value object creation,
|
entities and relationships, including URI generation, Term object creation,
|
||||||
and triple validation.
|
and triple validation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import Mock
|
from unittest.mock import Mock
|
||||||
from .conftest import Triple, Triples, Value, Metadata
|
from .conftest import Triple, Triples, Term, Metadata, IRI, LITERAL
|
||||||
import re
|
import re
|
||||||
import hashlib
|
import hashlib
|
||||||
|
|
||||||
|
|
@ -48,15 +48,14 @@ class TestTripleConstructionLogic:
|
||||||
generated_uri = generate_uri(text, entity_type)
|
generated_uri = generate_uri(text, entity_type)
|
||||||
assert generated_uri == expected_uri, f"URI generation failed for '{text}'"
|
assert generated_uri == expected_uri, f"URI generation failed for '{text}'"
|
||||||
|
|
||||||
def test_value_object_creation(self):
|
def test_term_object_creation(self):
|
||||||
"""Test creation of Value objects for subjects, predicates, and objects"""
|
"""Test creation of Term objects for subjects, predicates, and objects"""
|
||||||
# Arrange
|
# Arrange
|
||||||
def create_value_object(text, is_uri, value_type=""):
|
def create_term_object(text, is_uri, datatype=""):
|
||||||
return Value(
|
if is_uri:
|
||||||
value=text,
|
return Term(type=IRI, iri=text)
|
||||||
is_uri=is_uri,
|
else:
|
||||||
type=value_type
|
return Term(type=LITERAL, value=text, datatype=datatype if datatype else None)
|
||||||
)
|
|
||||||
|
|
||||||
test_cases = [
|
test_cases = [
|
||||||
("http://trustgraph.ai/kg/person/john-smith", True, ""),
|
("http://trustgraph.ai/kg/person/john-smith", True, ""),
|
||||||
|
|
@ -66,13 +65,16 @@ class TestTripleConstructionLogic:
|
||||||
]
|
]
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
for value_text, is_uri, value_type in test_cases:
|
for value_text, is_uri, datatype in test_cases:
|
||||||
value_obj = create_value_object(value_text, is_uri, value_type)
|
term_obj = create_term_object(value_text, is_uri, datatype)
|
||||||
|
|
||||||
assert isinstance(value_obj, Value)
|
assert isinstance(term_obj, Term)
|
||||||
assert value_obj.value == value_text
|
if is_uri:
|
||||||
assert value_obj.is_uri == is_uri
|
assert term_obj.type == IRI
|
||||||
assert value_obj.type == value_type
|
assert term_obj.iri == value_text
|
||||||
|
else:
|
||||||
|
assert term_obj.type == LITERAL
|
||||||
|
assert term_obj.value == value_text
|
||||||
|
|
||||||
def test_triple_construction_from_relationship(self):
|
def test_triple_construction_from_relationship(self):
|
||||||
"""Test constructing Triple objects from relationships"""
|
"""Test constructing Triple objects from relationships"""
|
||||||
|
|
@ -99,16 +101,16 @@ class TestTripleConstructionLogic:
|
||||||
predicate_uri = predicate_mappings.get(relationship["predicate"],
|
predicate_uri = predicate_mappings.get(relationship["predicate"],
|
||||||
f"{uri_base}/predicate/{relationship['predicate']}")
|
f"{uri_base}/predicate/{relationship['predicate']}")
|
||||||
|
|
||||||
# Create Value objects
|
# Create Term objects
|
||||||
subject_value = Value(value=subject_uri, is_uri=True, type="")
|
subject_term = Term(type=IRI, iri=subject_uri)
|
||||||
predicate_value = Value(value=predicate_uri, is_uri=True, type="")
|
predicate_term = Term(type=IRI, iri=predicate_uri)
|
||||||
object_value = Value(value=object_uri, is_uri=True, type="")
|
object_term = Term(type=IRI, iri=object_uri)
|
||||||
|
|
||||||
# Create Triple
|
# Create Triple
|
||||||
return Triple(
|
return Triple(
|
||||||
s=subject_value,
|
s=subject_term,
|
||||||
p=predicate_value,
|
p=predicate_term,
|
||||||
o=object_value
|
o=object_term
|
||||||
)
|
)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
|
@ -116,12 +118,12 @@ class TestTripleConstructionLogic:
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert isinstance(triple, Triple)
|
assert isinstance(triple, Triple)
|
||||||
assert triple.s.value == "http://trustgraph.ai/kg/person/john-smith"
|
assert triple.s.iri == "http://trustgraph.ai/kg/person/john-smith"
|
||||||
assert triple.s.is_uri is True
|
assert triple.s.type == IRI
|
||||||
assert triple.p.value == "http://schema.org/worksFor"
|
assert triple.p.iri == "http://schema.org/worksFor"
|
||||||
assert triple.p.is_uri is True
|
assert triple.p.type == IRI
|
||||||
assert triple.o.value == "http://trustgraph.ai/kg/org/openai"
|
assert triple.o.iri == "http://trustgraph.ai/kg/org/openai"
|
||||||
assert triple.o.is_uri is True
|
assert triple.o.type == IRI
|
||||||
|
|
||||||
def test_literal_value_handling(self):
|
def test_literal_value_handling(self):
|
||||||
"""Test handling of literal values vs URI values"""
|
"""Test handling of literal values vs URI values"""
|
||||||
|
|
@ -134,7 +136,7 @@ class TestTripleConstructionLogic:
|
||||||
]
|
]
|
||||||
|
|
||||||
def create_triple_with_literal(subject_uri, predicate, object_value, object_is_uri):
|
def create_triple_with_literal(subject_uri, predicate, object_value, object_is_uri):
|
||||||
subject_val = Value(value=subject_uri, is_uri=True, type="")
|
subject_term = Term(type=IRI, iri=subject_uri)
|
||||||
|
|
||||||
# Determine predicate URI
|
# Determine predicate URI
|
||||||
predicate_mappings = {
|
predicate_mappings = {
|
||||||
|
|
@ -144,32 +146,37 @@ class TestTripleConstructionLogic:
|
||||||
"worksFor": "http://schema.org/worksFor"
|
"worksFor": "http://schema.org/worksFor"
|
||||||
}
|
}
|
||||||
predicate_uri = predicate_mappings.get(predicate, f"http://trustgraph.ai/kg/predicate/{predicate}")
|
predicate_uri = predicate_mappings.get(predicate, f"http://trustgraph.ai/kg/predicate/{predicate}")
|
||||||
predicate_val = Value(value=predicate_uri, is_uri=True, type="")
|
predicate_term = Term(type=IRI, iri=predicate_uri)
|
||||||
|
|
||||||
# Create object value with appropriate type
|
# Create object term with appropriate type
|
||||||
object_type = ""
|
if object_is_uri:
|
||||||
if not object_is_uri:
|
object_term = Term(type=IRI, iri=object_value)
|
||||||
|
else:
|
||||||
|
datatype = None
|
||||||
if predicate == "age":
|
if predicate == "age":
|
||||||
object_type = "integer"
|
datatype = "integer"
|
||||||
elif predicate in ["name", "email"]:
|
elif predicate in ["name", "email"]:
|
||||||
object_type = "string"
|
datatype = "string"
|
||||||
|
object_term = Term(type=LITERAL, value=object_value, datatype=datatype)
|
||||||
|
|
||||||
object_val = Value(value=object_value, is_uri=object_is_uri, type=object_type)
|
return Triple(s=subject_term, p=predicate_term, o=object_term)
|
||||||
|
|
||||||
return Triple(s=subject_val, p=predicate_val, o=object_val)
|
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
for subject_uri, predicate, object_value, object_is_uri in test_data:
|
for subject_uri, predicate, object_value, object_is_uri in test_data:
|
||||||
subject_full_uri = "http://trustgraph.ai/kg/person/john-smith"
|
subject_full_uri = "http://trustgraph.ai/kg/person/john-smith"
|
||||||
triple = create_triple_with_literal(subject_full_uri, predicate, object_value, object_is_uri)
|
triple = create_triple_with_literal(subject_full_uri, predicate, object_value, object_is_uri)
|
||||||
|
|
||||||
assert triple.o.is_uri == object_is_uri
|
if object_is_uri:
|
||||||
|
assert triple.o.type == IRI
|
||||||
|
assert triple.o.iri == object_value
|
||||||
|
else:
|
||||||
|
assert triple.o.type == LITERAL
|
||||||
assert triple.o.value == object_value
|
assert triple.o.value == object_value
|
||||||
|
|
||||||
if predicate == "age":
|
if predicate == "age":
|
||||||
assert triple.o.type == "integer"
|
assert triple.o.datatype == "integer"
|
||||||
elif predicate in ["name", "email"]:
|
elif predicate in ["name", "email"]:
|
||||||
assert triple.o.type == "string"
|
assert triple.o.datatype == "string"
|
||||||
|
|
||||||
def test_namespace_management(self):
|
def test_namespace_management(self):
|
||||||
"""Test namespace prefix management and expansion"""
|
"""Test namespace prefix management and expansion"""
|
||||||
|
|
@ -216,57 +223,68 @@ class TestTripleConstructionLogic:
|
||||||
def test_triple_validation(self):
|
def test_triple_validation(self):
|
||||||
"""Test triple validation rules"""
|
"""Test triple validation rules"""
|
||||||
# Arrange
|
# Arrange
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract value from a Term"""
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
else:
|
||||||
|
return term.value
|
||||||
|
|
||||||
def validate_triple(triple):
|
def validate_triple(triple):
|
||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
# Check required components
|
# Check required components
|
||||||
if not triple.s or not triple.s.value:
|
s_val = get_term_value(triple.s) if triple.s else None
|
||||||
|
p_val = get_term_value(triple.p) if triple.p else None
|
||||||
|
o_val = get_term_value(triple.o) if triple.o else None
|
||||||
|
|
||||||
|
if not triple.s or not s_val:
|
||||||
errors.append("Missing or empty subject")
|
errors.append("Missing or empty subject")
|
||||||
|
|
||||||
if not triple.p or not triple.p.value:
|
if not triple.p or not p_val:
|
||||||
errors.append("Missing or empty predicate")
|
errors.append("Missing or empty predicate")
|
||||||
|
|
||||||
if not triple.o or not triple.o.value:
|
if not triple.o or not o_val:
|
||||||
errors.append("Missing or empty object")
|
errors.append("Missing or empty object")
|
||||||
|
|
||||||
# Check URI validity for URI values
|
# Check URI validity for URI values
|
||||||
uri_pattern = r'^https?://[^\s/$.?#].[^\s]*$'
|
uri_pattern = r'^https?://[^\s/$.?#].[^\s]*$'
|
||||||
|
|
||||||
if triple.s.is_uri and not re.match(uri_pattern, triple.s.value):
|
if triple.s.type == IRI and not re.match(uri_pattern, triple.s.iri or ""):
|
||||||
errors.append("Invalid subject URI format")
|
errors.append("Invalid subject URI format")
|
||||||
|
|
||||||
if triple.p.is_uri and not re.match(uri_pattern, triple.p.value):
|
if triple.p.type == IRI and not re.match(uri_pattern, triple.p.iri or ""):
|
||||||
errors.append("Invalid predicate URI format")
|
errors.append("Invalid predicate URI format")
|
||||||
|
|
||||||
if triple.o.is_uri and not re.match(uri_pattern, triple.o.value):
|
if triple.o.type == IRI and not re.match(uri_pattern, triple.o.iri or ""):
|
||||||
errors.append("Invalid object URI format")
|
errors.append("Invalid object URI format")
|
||||||
|
|
||||||
# Predicates should typically be URIs
|
# Predicates should typically be URIs
|
||||||
if not triple.p.is_uri:
|
if triple.p.type != IRI:
|
||||||
errors.append("Predicate should be a URI")
|
errors.append("Predicate should be a URI")
|
||||||
|
|
||||||
return len(errors) == 0, errors
|
return len(errors) == 0, errors
|
||||||
|
|
||||||
# Test valid triple
|
# Test valid triple
|
||||||
valid_triple = Triple(
|
valid_triple = Triple(
|
||||||
s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
|
s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
|
||||||
p=Value(value="http://schema.org/name", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/name"),
|
||||||
o=Value(value="John Smith", is_uri=False, type="string")
|
o=Term(type=LITERAL, value="John Smith", datatype="string")
|
||||||
)
|
)
|
||||||
|
|
||||||
# Test invalid triples
|
# Test invalid triples
|
||||||
invalid_triples = [
|
invalid_triples = [
|
||||||
Triple(s=Value(value="", is_uri=True, type=""),
|
Triple(s=Term(type=IRI, iri=""),
|
||||||
p=Value(value="http://schema.org/name", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/name"),
|
||||||
o=Value(value="John", is_uri=False, type="")), # Empty subject
|
o=Term(type=LITERAL, value="John")), # Empty subject
|
||||||
|
|
||||||
Triple(s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
|
Triple(s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
|
||||||
p=Value(value="name", is_uri=False, type=""), # Non-URI predicate
|
p=Term(type=LITERAL, value="name"), # Non-URI predicate
|
||||||
o=Value(value="John", is_uri=False, type="")),
|
o=Term(type=LITERAL, value="John")),
|
||||||
|
|
||||||
Triple(s=Value(value="invalid-uri", is_uri=True, type=""),
|
Triple(s=Term(type=IRI, iri="invalid-uri"),
|
||||||
p=Value(value="http://schema.org/name", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/name"),
|
||||||
o=Value(value="John", is_uri=False, type="")) # Invalid URI format
|
o=Term(type=LITERAL, value="John")) # Invalid URI format
|
||||||
]
|
]
|
||||||
|
|
||||||
# Act & Assert
|
# Act & Assert
|
||||||
|
|
@ -301,9 +319,9 @@ class TestTripleConstructionLogic:
|
||||||
type_uri = f"http://trustgraph.ai/kg/type/{entity['type']}"
|
type_uri = f"http://trustgraph.ai/kg/type/{entity['type']}"
|
||||||
|
|
||||||
type_triple = Triple(
|
type_triple = Triple(
|
||||||
s=Value(value=entity_uri, is_uri=True, type=""),
|
s=Term(type=IRI, iri=entity_uri),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
|
||||||
o=Value(value=type_uri, is_uri=True, type="")
|
o=Term(type=IRI, iri=type_uri)
|
||||||
)
|
)
|
||||||
triples.append(type_triple)
|
triples.append(type_triple)
|
||||||
|
|
||||||
|
|
@ -314,9 +332,9 @@ class TestTripleConstructionLogic:
|
||||||
predicate_uri = f"http://schema.org/{rel['predicate'].replace('_', '')}"
|
predicate_uri = f"http://schema.org/{rel['predicate'].replace('_', '')}"
|
||||||
|
|
||||||
rel_triple = Triple(
|
rel_triple = Triple(
|
||||||
s=Value(value=subject_uri, is_uri=True, type=""),
|
s=Term(type=IRI, iri=subject_uri),
|
||||||
p=Value(value=predicate_uri, is_uri=True, type=""),
|
p=Term(type=IRI, iri=predicate_uri),
|
||||||
o=Value(value=object_uri, is_uri=True, type="")
|
o=Term(type=IRI, iri=object_uri)
|
||||||
)
|
)
|
||||||
triples.append(rel_triple)
|
triples.append(rel_triple)
|
||||||
|
|
||||||
|
|
@ -331,23 +349,23 @@ class TestTripleConstructionLogic:
|
||||||
# Check that all triples are valid Triple objects
|
# Check that all triples are valid Triple objects
|
||||||
for triple in triples:
|
for triple in triples:
|
||||||
assert isinstance(triple, Triple)
|
assert isinstance(triple, Triple)
|
||||||
assert triple.s.value != ""
|
assert triple.s.iri != ""
|
||||||
assert triple.p.value != ""
|
assert triple.p.iri != ""
|
||||||
assert triple.o.value != ""
|
assert triple.o.iri != ""
|
||||||
|
|
||||||
def test_triples_batch_object_creation(self):
|
def test_triples_batch_object_creation(self):
|
||||||
"""Test creating Triples batch objects with metadata"""
|
"""Test creating Triples batch objects with metadata"""
|
||||||
# Arrange
|
# Arrange
|
||||||
sample_triples = [
|
sample_triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
|
s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
|
||||||
p=Value(value="http://schema.org/name", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/name"),
|
||||||
o=Value(value="John Smith", is_uri=False, type="string")
|
o=Term(type=LITERAL, value="John Smith", datatype="string")
|
||||||
),
|
),
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://trustgraph.ai/kg/person/john", is_uri=True, type=""),
|
s=Term(type=IRI, iri="http://trustgraph.ai/kg/person/john"),
|
||||||
p=Value(value="http://schema.org/worksFor", is_uri=True, type=""),
|
p=Term(type=IRI, iri="http://schema.org/worksFor"),
|
||||||
o=Value(value="http://trustgraph.ai/kg/org/openai", is_uri=True, type="")
|
o=Term(type=IRI, iri="http://trustgraph.ai/kg/org/openai")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -374,9 +392,9 @@ class TestTripleConstructionLogic:
|
||||||
# Check that triples are properly embedded
|
# Check that triples are properly embedded
|
||||||
for triple in triples_batch.triples:
|
for triple in triples_batch.triples:
|
||||||
assert isinstance(triple, Triple)
|
assert isinstance(triple, Triple)
|
||||||
assert isinstance(triple.s, Value)
|
assert isinstance(triple.s, Term)
|
||||||
assert isinstance(triple.p, Value)
|
assert isinstance(triple.p, Term)
|
||||||
assert isinstance(triple.o, Value)
|
assert isinstance(triple.o, Term)
|
||||||
|
|
||||||
def test_uri_collision_handling(self):
|
def test_uri_collision_handling(self):
|
||||||
"""Test handling of URI collisions and duplicate detection"""
|
"""Test handling of URI collisions and duplicate detection"""
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.graph_embeddings.milvus.service import Processor
|
from trustgraph.query.graph_embeddings.milvus.service import Processor
|
||||||
from trustgraph.schema import Value, GraphEmbeddingsRequest
|
from trustgraph.schema import Term, GraphEmbeddingsRequest, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestMilvusGraphEmbeddingsQueryProcessor:
|
class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
|
|
@ -69,49 +69,49 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
"""Test create_value with HTTP URI"""
|
"""Test create_value with HTTP URI"""
|
||||||
result = processor.create_value("http://example.com/resource")
|
result = processor.create_value("http://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_https_uri(self, processor):
|
def test_create_value_with_https_uri(self, processor):
|
||||||
"""Test create_value with HTTPS URI"""
|
"""Test create_value with HTTPS URI"""
|
||||||
result = processor.create_value("https://example.com/resource")
|
result = processor.create_value("https://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "https://example.com/resource"
|
assert result.iri == "https://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_literal(self, processor):
|
def test_create_value_with_literal(self, processor):
|
||||||
"""Test create_value with literal value"""
|
"""Test create_value with literal value"""
|
||||||
result = processor.create_value("just a literal string")
|
result = processor.create_value("just a literal string")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "just a literal string"
|
assert result.value == "just a literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_empty_string(self, processor):
|
def test_create_value_with_empty_string(self, processor):
|
||||||
"""Test create_value with empty string"""
|
"""Test create_value with empty string"""
|
||||||
result = processor.create_value("")
|
result = processor.create_value("")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == ""
|
assert result.value == ""
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_partial_uri(self, processor):
|
def test_create_value_with_partial_uri(self, processor):
|
||||||
"""Test create_value with string that looks like URI but isn't complete"""
|
"""Test create_value with string that looks like URI but isn't complete"""
|
||||||
result = processor.create_value("http")
|
result = processor.create_value("http")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http"
|
assert result.value == "http"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_ftp_uri(self, processor):
|
def test_create_value_with_ftp_uri(self, processor):
|
||||||
"""Test create_value with FTP URI (should not be detected as URI)"""
|
"""Test create_value with FTP URI (should not be detected as URI)"""
|
||||||
result = processor.create_value("ftp://example.com/file")
|
result = processor.create_value("ftp://example.com/file")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "ftp://example.com/file"
|
assert result.value == "ftp://example.com/file"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_query_graph_embeddings_single_vector(self, processor):
|
async def test_query_graph_embeddings_single_vector(self, processor):
|
||||||
|
|
@ -138,17 +138,17 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
[0.1, 0.2, 0.3], 'test_user', 'test_collection', limit=10
|
[0.1, 0.2, 0.3], 'test_user', 'test_collection', limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify results are converted to Value objects
|
# Verify results are converted to Term objects
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
assert isinstance(result[0], Value)
|
assert isinstance(result[0], Term)
|
||||||
assert result[0].value == "http://example.com/entity1"
|
assert result[0].iri == "http://example.com/entity1"
|
||||||
assert result[0].is_uri is True
|
assert result[0].type == IRI
|
||||||
assert isinstance(result[1], Value)
|
assert isinstance(result[1], Term)
|
||||||
assert result[1].value == "http://example.com/entity2"
|
assert result[1].iri == "http://example.com/entity2"
|
||||||
assert result[1].is_uri is True
|
assert result[1].type == IRI
|
||||||
assert isinstance(result[2], Value)
|
assert isinstance(result[2], Term)
|
||||||
assert result[2].value == "literal entity"
|
assert result[2].value == "literal entity"
|
||||||
assert result[2].is_uri is False
|
assert result[2].type == LITERAL
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_query_graph_embeddings_multiple_vectors(self, processor):
|
async def test_query_graph_embeddings_multiple_vectors(self, processor):
|
||||||
|
|
@ -186,7 +186,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
|
|
||||||
# Verify results are deduplicated and limited
|
# Verify results are deduplicated and limited
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
entity_values = [r.value for r in result]
|
entity_values = [r.iri if r.type == IRI else r.value for r in result]
|
||||||
assert "http://example.com/entity1" in entity_values
|
assert "http://example.com/entity1" in entity_values
|
||||||
assert "http://example.com/entity2" in entity_values
|
assert "http://example.com/entity2" in entity_values
|
||||||
assert "http://example.com/entity3" in entity_values
|
assert "http://example.com/entity3" in entity_values
|
||||||
|
|
@ -246,7 +246,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
|
|
||||||
# Verify duplicates are removed
|
# Verify duplicates are removed
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
entity_values = [r.value for r in result]
|
entity_values = [r.iri if r.type == IRI else r.value for r in result]
|
||||||
assert len(set(entity_values)) == 3 # All unique
|
assert len(set(entity_values)) == 3 # All unique
|
||||||
assert "http://example.com/entity1" in entity_values
|
assert "http://example.com/entity1" in entity_values
|
||||||
assert "http://example.com/entity2" in entity_values
|
assert "http://example.com/entity2" in entity_values
|
||||||
|
|
@ -346,14 +346,14 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
assert len(result) == 4
|
assert len(result) == 4
|
||||||
|
|
||||||
# Check URI entities
|
# Check URI entities
|
||||||
uri_results = [r for r in result if r.is_uri]
|
uri_results = [r for r in result if r.type == IRI]
|
||||||
assert len(uri_results) == 2
|
assert len(uri_results) == 2
|
||||||
uri_values = [r.value for r in uri_results]
|
uri_values = [r.iri for r in uri_results]
|
||||||
assert "http://example.com/uri_entity" in uri_values
|
assert "http://example.com/uri_entity" in uri_values
|
||||||
assert "https://example.com/another_uri" in uri_values
|
assert "https://example.com/another_uri" in uri_values
|
||||||
|
|
||||||
# Check literal entities
|
# Check literal entities
|
||||||
literal_results = [r for r in result if not r.is_uri]
|
literal_results = [r for r in result if not r.type == IRI]
|
||||||
assert len(literal_results) == 2
|
assert len(literal_results) == 2
|
||||||
literal_values = [r.value for r in literal_results]
|
literal_values = [r.value for r in literal_results]
|
||||||
assert "literal entity text" in literal_values
|
assert "literal entity text" in literal_values
|
||||||
|
|
@ -486,7 +486,7 @@ class TestMilvusGraphEmbeddingsQueryProcessor:
|
||||||
|
|
||||||
# Verify results from all dimensions
|
# Verify results from all dimensions
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
entity_values = [r.value for r in result]
|
entity_values = [r.iri if r.type == IRI else r.value for r in result]
|
||||||
assert "entity_2d" in entity_values
|
assert "entity_2d" in entity_values
|
||||||
assert "entity_4d" in entity_values
|
assert "entity_4d" in entity_values
|
||||||
assert "entity_3d" in entity_values
|
assert "entity_3d" in entity_values
|
||||||
|
|
@ -9,7 +9,7 @@ from unittest.mock import MagicMock, patch
|
||||||
pytest.skip("Pinecone library missing protoc_gen_openapiv2 dependency", allow_module_level=True)
|
pytest.skip("Pinecone library missing protoc_gen_openapiv2 dependency", allow_module_level=True)
|
||||||
|
|
||||||
from trustgraph.query.graph_embeddings.pinecone.service import Processor
|
from trustgraph.query.graph_embeddings.pinecone.service import Processor
|
||||||
from trustgraph.schema import Value
|
from trustgraph.schema import Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestPineconeGraphEmbeddingsQueryProcessor:
|
class TestPineconeGraphEmbeddingsQueryProcessor:
|
||||||
|
|
@ -105,27 +105,27 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
||||||
uri_entity = "http://example.org/entity"
|
uri_entity = "http://example.org/entity"
|
||||||
value = processor.create_value(uri_entity)
|
value = processor.create_value(uri_entity)
|
||||||
|
|
||||||
assert isinstance(value, Value)
|
assert isinstance(value, Term)
|
||||||
assert value.value == uri_entity
|
assert value.value == uri_entity
|
||||||
assert value.is_uri == True
|
assert value.type == IRI
|
||||||
|
|
||||||
def test_create_value_https_uri(self, processor):
|
def test_create_value_https_uri(self, processor):
|
||||||
"""Test create_value method for HTTPS URI entities"""
|
"""Test create_value method for HTTPS URI entities"""
|
||||||
uri_entity = "https://example.org/entity"
|
uri_entity = "https://example.org/entity"
|
||||||
value = processor.create_value(uri_entity)
|
value = processor.create_value(uri_entity)
|
||||||
|
|
||||||
assert isinstance(value, Value)
|
assert isinstance(value, Term)
|
||||||
assert value.value == uri_entity
|
assert value.value == uri_entity
|
||||||
assert value.is_uri == True
|
assert value.type == IRI
|
||||||
|
|
||||||
def test_create_value_literal(self, processor):
|
def test_create_value_literal(self, processor):
|
||||||
"""Test create_value method for literal entities"""
|
"""Test create_value method for literal entities"""
|
||||||
literal_entity = "literal_entity"
|
literal_entity = "literal_entity"
|
||||||
value = processor.create_value(literal_entity)
|
value = processor.create_value(literal_entity)
|
||||||
|
|
||||||
assert isinstance(value, Value)
|
assert isinstance(value, Term)
|
||||||
assert value.value == literal_entity
|
assert value.value == literal_entity
|
||||||
assert value.is_uri == False
|
assert value.type == LITERAL
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_query_graph_embeddings_single_vector(self, processor):
|
async def test_query_graph_embeddings_single_vector(self, processor):
|
||||||
|
|
@ -165,11 +165,11 @@ class TestPineconeGraphEmbeddingsQueryProcessor:
|
||||||
# Verify results
|
# Verify results
|
||||||
assert len(entities) == 3
|
assert len(entities) == 3
|
||||||
assert entities[0].value == 'http://example.org/entity1'
|
assert entities[0].value == 'http://example.org/entity1'
|
||||||
assert entities[0].is_uri == True
|
assert entities[0].type == IRI
|
||||||
assert entities[1].value == 'entity2'
|
assert entities[1].value == 'entity2'
|
||||||
assert entities[1].is_uri == False
|
assert entities[1].type == LITERAL
|
||||||
assert entities[2].value == 'http://example.org/entity3'
|
assert entities[2].value == 'http://example.org/entity3'
|
||||||
assert entities[2].is_uri == True
|
assert entities[2].type == IRI
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_query_graph_embeddings_multiple_vectors(self, processor, mock_query_message):
|
async def test_query_graph_embeddings_multiple_vectors(self, processor, mock_query_message):
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from unittest import IsolatedAsyncioTestCase
|
||||||
|
|
||||||
# Import the service under test
|
# Import the service under test
|
||||||
from trustgraph.query.graph_embeddings.qdrant.service import Processor
|
from trustgraph.query.graph_embeddings.qdrant.service import Processor
|
||||||
|
from trustgraph.schema import IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
||||||
|
|
@ -85,10 +86,10 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
||||||
value = processor.create_value('http://example.com/entity')
|
value = processor.create_value('http://example.com/entity')
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert hasattr(value, 'value')
|
assert hasattr(value, 'iri')
|
||||||
assert value.value == 'http://example.com/entity'
|
assert value.iri == 'http://example.com/entity'
|
||||||
assert hasattr(value, 'is_uri')
|
assert hasattr(value, 'type')
|
||||||
assert value.is_uri == True
|
assert value.type == IRI
|
||||||
|
|
||||||
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
||||||
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
||||||
|
|
@ -109,10 +110,10 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
||||||
value = processor.create_value('https://secure.example.com/entity')
|
value = processor.create_value('https://secure.example.com/entity')
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert hasattr(value, 'value')
|
assert hasattr(value, 'iri')
|
||||||
assert value.value == 'https://secure.example.com/entity'
|
assert value.iri == 'https://secure.example.com/entity'
|
||||||
assert hasattr(value, 'is_uri')
|
assert hasattr(value, 'type')
|
||||||
assert value.is_uri == True
|
assert value.type == IRI
|
||||||
|
|
||||||
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
||||||
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
||||||
|
|
@ -135,8 +136,8 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
||||||
# Assert
|
# Assert
|
||||||
assert hasattr(value, 'value')
|
assert hasattr(value, 'value')
|
||||||
assert value.value == 'regular entity name'
|
assert value.value == 'regular entity name'
|
||||||
assert hasattr(value, 'is_uri')
|
assert hasattr(value, 'type')
|
||||||
assert value.is_uri == False
|
assert value.type == LITERAL
|
||||||
|
|
||||||
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
@patch('trustgraph.query.graph_embeddings.qdrant.service.QdrantClient')
|
||||||
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
@patch('trustgraph.base.GraphEmbeddingsQueryService.__init__')
|
||||||
|
|
@ -428,14 +429,14 @@ class TestQdrantGraphEmbeddingsQuery(IsolatedAsyncioTestCase):
|
||||||
assert len(result) == 3
|
assert len(result) == 3
|
||||||
|
|
||||||
# Check URI entities
|
# Check URI entities
|
||||||
uri_entities = [entity for entity in result if hasattr(entity, 'is_uri') and entity.is_uri]
|
uri_entities = [entity for entity in result if entity.type == IRI]
|
||||||
assert len(uri_entities) == 2
|
assert len(uri_entities) == 2
|
||||||
uri_values = [entity.value for entity in uri_entities]
|
uri_values = [entity.iri for entity in uri_entities]
|
||||||
assert 'http://example.com/entity1' in uri_values
|
assert 'http://example.com/entity1' in uri_values
|
||||||
assert 'https://secure.example.com/entity2' in uri_values
|
assert 'https://secure.example.com/entity2' in uri_values
|
||||||
|
|
||||||
# Check regular entities
|
# Check regular entities
|
||||||
regular_entities = [entity for entity in result if hasattr(entity, 'is_uri') and not entity.is_uri]
|
regular_entities = [entity for entity in result if entity.type == LITERAL]
|
||||||
assert len(regular_entities) == 1
|
assert len(regular_entities) == 1
|
||||||
assert regular_entities[0].value == 'regular entity'
|
assert regular_entities[0].value == 'regular entity'
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.memgraph.service import Processor
|
from trustgraph.query.triples.memgraph.service import Processor
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestMemgraphQueryUserCollectionIsolation:
|
class TestMemgraphQueryUserCollectionIsolation:
|
||||||
|
|
@ -24,9 +24,9 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="test_object", is_uri=False),
|
o=Term(type=LITERAL, value="test_object"),
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -65,8 +65,8 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
@ -105,9 +105,9 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="http://example.com/o", is_uri=True),
|
o=Term(type=IRI, iri="http://example.com/o"),
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -145,7 +145,7 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
|
|
@ -185,8 +185,8 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="literal", is_uri=False),
|
o=Term(type=LITERAL, value="literal"),
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -225,7 +225,7 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
@ -265,7 +265,7 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="test_value", is_uri=False),
|
o=Term(type=LITERAL, value="test_value"),
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -355,7 +355,7 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
|
|
||||||
# Query without user/collection fields
|
# Query without user/collection fields
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
|
|
@ -385,7 +385,7 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
|
|
@ -416,17 +416,17 @@ class TestMemgraphQueryUserCollectionIsolation:
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
|
|
||||||
# First triple (literal object)
|
# First triple (literal object)
|
||||||
assert result[0].s.value == "http://example.com/s"
|
assert result[0].s.iri == "http://example.com/s"
|
||||||
assert result[0].s.is_uri == True
|
assert result[0].s.type == IRI
|
||||||
assert result[0].p.value == "http://example.com/p1"
|
assert result[0].p.iri == "http://example.com/p1"
|
||||||
assert result[0].p.is_uri == True
|
assert result[0].p.type == IRI
|
||||||
assert result[0].o.value == "literal_value"
|
assert result[0].o.value == "literal_value"
|
||||||
assert result[0].o.is_uri == False
|
assert result[0].o.type == LITERAL
|
||||||
|
|
||||||
# Second triple (URI object)
|
# Second triple (URI object)
|
||||||
assert result[1].s.value == "http://example.com/s"
|
assert result[1].s.iri == "http://example.com/s"
|
||||||
assert result[1].s.is_uri == True
|
assert result[1].s.type == IRI
|
||||||
assert result[1].p.value == "http://example.com/p2"
|
assert result[1].p.iri == "http://example.com/p2"
|
||||||
assert result[1].p.is_uri == True
|
assert result[1].p.type == IRI
|
||||||
assert result[1].o.value == "http://example.com/o"
|
assert result[1].o.iri == "http://example.com/o"
|
||||||
assert result[1].o.is_uri == True
|
assert result[1].o.type == IRI
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.neo4j.service import Processor
|
from trustgraph.query.triples.neo4j.service import Processor
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestNeo4jQueryUserCollectionIsolation:
|
class TestNeo4jQueryUserCollectionIsolation:
|
||||||
|
|
@ -24,9 +24,9 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="test_object", is_uri=False),
|
o=Term(type=LITERAL, value="test_object"),
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -65,8 +65,8 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
@ -123,9 +123,9 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="http://example.com/o", is_uri=True),
|
o=Term(type=IRI, iri="http://example.com/o"),
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -163,7 +163,7 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
|
|
@ -203,8 +203,8 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="literal", is_uri=False),
|
o=Term(type=LITERAL, value="literal"),
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -243,7 +243,7 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
@ -283,7 +283,7 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=None,
|
s=None,
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="test_value", is_uri=False),
|
o=Term(type=LITERAL, value="test_value"),
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -373,7 +373,7 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
|
|
||||||
# Query without user/collection fields
|
# Query without user/collection fields
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
|
|
@ -403,7 +403,7 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/s", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/s"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
|
|
@ -434,17 +434,17 @@ class TestNeo4jQueryUserCollectionIsolation:
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
|
|
||||||
# First triple (literal object)
|
# First triple (literal object)
|
||||||
assert result[0].s.value == "http://example.com/s"
|
assert result[0].s.iri == "http://example.com/s"
|
||||||
assert result[0].s.is_uri == True
|
assert result[0].s.type == IRI
|
||||||
assert result[0].p.value == "http://example.com/p1"
|
assert result[0].p.iri == "http://example.com/p1"
|
||||||
assert result[0].p.is_uri == True
|
assert result[0].p.type == IRI
|
||||||
assert result[0].o.value == "literal_value"
|
assert result[0].o.value == "literal_value"
|
||||||
assert result[0].o.is_uri == False
|
assert result[0].o.type == LITERAL
|
||||||
|
|
||||||
# Second triple (URI object)
|
# Second triple (URI object)
|
||||||
assert result[1].s.value == "http://example.com/s"
|
assert result[1].s.iri == "http://example.com/s"
|
||||||
assert result[1].s.is_uri == True
|
assert result[1].s.type == IRI
|
||||||
assert result[1].p.value == "http://example.com/p2"
|
assert result[1].p.iri == "http://example.com/p2"
|
||||||
assert result[1].p.is_uri == True
|
assert result[1].p.type == IRI
|
||||||
assert result[1].o.value == "http://example.com/o"
|
assert result[1].o.iri == "http://example.com/o"
|
||||||
assert result[1].o.is_uri == True
|
assert result[1].o.type == IRI
|
||||||
|
|
@ -5,8 +5,8 @@ Tests for Cassandra triples query service
|
||||||
import pytest
|
import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.cassandra.service import Processor
|
from trustgraph.query.triples.cassandra.service import Processor, create_term
|
||||||
from trustgraph.schema import Value
|
from trustgraph.schema import Term, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestCassandraQueryProcessor:
|
class TestCassandraQueryProcessor:
|
||||||
|
|
@ -21,64 +21,67 @@ class TestCassandraQueryProcessor:
|
||||||
graph_host='localhost'
|
graph_host='localhost'
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_create_value_with_http_uri(self, processor):
|
def test_create_term_with_http_uri(self, processor):
|
||||||
"""Test create_value with HTTP URI"""
|
"""Test create_term with HTTP URI"""
|
||||||
result = processor.create_value("http://example.com/resource")
|
result = create_term("http://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_https_uri(self, processor):
|
def test_create_term_with_https_uri(self, processor):
|
||||||
"""Test create_value with HTTPS URI"""
|
"""Test create_term with HTTPS URI"""
|
||||||
result = processor.create_value("https://example.com/resource")
|
result = create_term("https://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "https://example.com/resource"
|
assert result.iri == "https://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_literal(self, processor):
|
def test_create_term_with_literal(self, processor):
|
||||||
"""Test create_value with literal value"""
|
"""Test create_term with literal value"""
|
||||||
result = processor.create_value("just a literal string")
|
result = create_term("just a literal string")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "just a literal string"
|
assert result.value == "just a literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_empty_string(self, processor):
|
def test_create_term_with_empty_string(self, processor):
|
||||||
"""Test create_value with empty string"""
|
"""Test create_term with empty string"""
|
||||||
result = processor.create_value("")
|
result = create_term("")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == ""
|
assert result.value == ""
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_partial_uri(self, processor):
|
def test_create_term_with_partial_uri(self, processor):
|
||||||
"""Test create_value with string that looks like URI but isn't complete"""
|
"""Test create_term with string that looks like URI but isn't complete"""
|
||||||
result = processor.create_value("http")
|
result = create_term("http")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http"
|
assert result.value == "http"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_ftp_uri(self, processor):
|
def test_create_term_with_ftp_uri(self, processor):
|
||||||
"""Test create_value with FTP URI (should not be detected as URI)"""
|
"""Test create_term with FTP URI (should not be detected as URI)"""
|
||||||
result = processor.create_value("ftp://example.com/file")
|
result = create_term("ftp://example.com/file")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "ftp://example.com/file"
|
assert result.value == "ftp://example.com/file"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_spo_query(self, mock_trustgraph):
|
async def test_query_triples_spo_query(self, mock_trustgraph):
|
||||||
"""Test querying triples with subject, predicate, and object specified"""
|
"""Test querying triples with subject, predicate, and object specified"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
# Setup mock TrustGraph
|
# Setup mock TrustGraph
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
mock_tg_instance.get_spo.return_value = None # SPO query returns None if found
|
# SPO query returns a list of results (with mock graph attribute)
|
||||||
|
mock_result = MagicMock()
|
||||||
|
mock_result.g = None
|
||||||
|
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||||
|
|
||||||
processor = Processor(
|
processor = Processor(
|
||||||
taskgroup=MagicMock(),
|
taskgroup=MagicMock(),
|
||||||
|
|
@ -90,9 +93,9 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -106,7 +109,7 @@ class TestCassandraQueryProcessor:
|
||||||
|
|
||||||
# Verify get_spo was called with correct parameters
|
# Verify get_spo was called with correct parameters
|
||||||
mock_tg_instance.get_spo.assert_called_once_with(
|
mock_tg_instance.get_spo.assert_called_once_with(
|
||||||
'test_collection', 'test_subject', 'test_predicate', 'test_object', limit=100
|
'test_collection', 'test_subject', 'test_predicate', 'test_object', g=None, limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify result contains the queried triple
|
# Verify result contains the queried triple
|
||||||
|
|
@ -146,7 +149,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_sp_pattern(self, mock_trustgraph):
|
async def test_query_triples_sp_pattern(self, mock_trustgraph):
|
||||||
"""Test SP query pattern (subject and predicate, no object)"""
|
"""Test SP query pattern (subject and predicate, no object)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
# Setup mock TrustGraph and response
|
# Setup mock TrustGraph and response
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
|
|
@ -161,15 +164,15 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=None,
|
o=None,
|
||||||
limit=50
|
limit=50
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await processor.query_triples(query)
|
result = await processor.query_triples(query)
|
||||||
|
|
||||||
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', limit=50)
|
mock_tg_instance.get_sp.assert_called_once_with('test_collection', 'test_subject', 'test_predicate', g=None, limit=50)
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert result[0].s.value == 'test_subject'
|
assert result[0].s.value == 'test_subject'
|
||||||
assert result[0].p.value == 'test_predicate'
|
assert result[0].p.value == 'test_predicate'
|
||||||
|
|
@ -179,7 +182,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_s_pattern(self, mock_trustgraph):
|
async def test_query_triples_s_pattern(self, mock_trustgraph):
|
||||||
"""Test S query pattern (subject only)"""
|
"""Test S query pattern (subject only)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -194,7 +197,7 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=25
|
limit=25
|
||||||
|
|
@ -202,7 +205,7 @@ class TestCassandraQueryProcessor:
|
||||||
|
|
||||||
result = await processor.query_triples(query)
|
result = await processor.query_triples(query)
|
||||||
|
|
||||||
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', limit=25)
|
mock_tg_instance.get_s.assert_called_once_with('test_collection', 'test_subject', g=None, limit=25)
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert result[0].s.value == 'test_subject'
|
assert result[0].s.value == 'test_subject'
|
||||||
assert result[0].p.value == 'result_predicate'
|
assert result[0].p.value == 'result_predicate'
|
||||||
|
|
@ -212,7 +215,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_p_pattern(self, mock_trustgraph):
|
async def test_query_triples_p_pattern(self, mock_trustgraph):
|
||||||
"""Test P query pattern (predicate only)"""
|
"""Test P query pattern (predicate only)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -228,14 +231,14 @@ class TestCassandraQueryProcessor:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=None,
|
o=None,
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await processor.query_triples(query)
|
result = await processor.query_triples(query)
|
||||||
|
|
||||||
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', limit=10)
|
mock_tg_instance.get_p.assert_called_once_with('test_collection', 'test_predicate', g=None, limit=10)
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert result[0].s.value == 'result_subject'
|
assert result[0].s.value == 'result_subject'
|
||||||
assert result[0].p.value == 'test_predicate'
|
assert result[0].p.value == 'test_predicate'
|
||||||
|
|
@ -245,7 +248,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_o_pattern(self, mock_trustgraph):
|
async def test_query_triples_o_pattern(self, mock_trustgraph):
|
||||||
"""Test O query pattern (object only)"""
|
"""Test O query pattern (object only)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -262,13 +265,13 @@ class TestCassandraQueryProcessor:
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=75
|
limit=75
|
||||||
)
|
)
|
||||||
|
|
||||||
result = await processor.query_triples(query)
|
result = await processor.query_triples(query)
|
||||||
|
|
||||||
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', limit=75)
|
mock_tg_instance.get_o.assert_called_once_with('test_collection', 'test_object', g=None, limit=75)
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
assert result[0].s.value == 'result_subject'
|
assert result[0].s.value == 'result_subject'
|
||||||
assert result[0].p.value == 'result_predicate'
|
assert result[0].p.value == 'result_predicate'
|
||||||
|
|
@ -372,17 +375,20 @@ class TestCassandraQueryProcessor:
|
||||||
|
|
||||||
run()
|
run()
|
||||||
|
|
||||||
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o) triple, some values may be\nnull. Output is a list of triples.\n')
|
mock_launch.assert_called_once_with(default_ident, '\nTriples query service. Input is a (s, p, o, g) quad pattern, some values may be\nnull. Output is a list of quads.\n')
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_with_authentication(self, mock_trustgraph):
|
async def test_query_triples_with_authentication(self, mock_trustgraph):
|
||||||
"""Test querying with username and password authentication"""
|
"""Test querying with username and password authentication"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
mock_tg_instance.get_spo.return_value = None
|
# SPO query returns a list of results
|
||||||
|
mock_result = MagicMock()
|
||||||
|
mock_result.g = None
|
||||||
|
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||||
|
|
||||||
processor = Processor(
|
processor = Processor(
|
||||||
taskgroup=MagicMock(),
|
taskgroup=MagicMock(),
|
||||||
|
|
@ -393,9 +399,9 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -413,20 +419,23 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_table_reuse(self, mock_trustgraph):
|
async def test_query_triples_table_reuse(self, mock_trustgraph):
|
||||||
"""Test that TrustGraph is reused for same table"""
|
"""Test that TrustGraph is reused for same table"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
mock_tg_instance.get_spo.return_value = None
|
# SPO query returns a list of results
|
||||||
|
mock_result = MagicMock()
|
||||||
|
mock_result.g = None
|
||||||
|
mock_tg_instance.get_spo.return_value = [mock_result]
|
||||||
|
|
||||||
processor = Processor(taskgroup=MagicMock())
|
processor = Processor(taskgroup=MagicMock())
|
||||||
|
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -442,7 +451,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_table_switching(self, mock_trustgraph):
|
async def test_query_triples_table_switching(self, mock_trustgraph):
|
||||||
"""Test table switching creates new TrustGraph"""
|
"""Test table switching creates new TrustGraph"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance1 = MagicMock()
|
mock_tg_instance1 = MagicMock()
|
||||||
mock_tg_instance2 = MagicMock()
|
mock_tg_instance2 = MagicMock()
|
||||||
|
|
@ -454,7 +463,7 @@ class TestCassandraQueryProcessor:
|
||||||
query1 = TriplesQueryRequest(
|
query1 = TriplesQueryRequest(
|
||||||
user='user1',
|
user='user1',
|
||||||
collection='collection1',
|
collection='collection1',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
@ -467,7 +476,7 @@ class TestCassandraQueryProcessor:
|
||||||
query2 = TriplesQueryRequest(
|
query2 = TriplesQueryRequest(
|
||||||
user='user2',
|
user='user2',
|
||||||
collection='collection2',
|
collection='collection2',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
@ -483,7 +492,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_exception_handling(self, mock_trustgraph):
|
async def test_query_triples_exception_handling(self, mock_trustgraph):
|
||||||
"""Test exception handling during query execution"""
|
"""Test exception handling during query execution"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -494,9 +503,9 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -507,7 +516,7 @@ class TestCassandraQueryProcessor:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_query_triples_multiple_results(self, mock_trustgraph):
|
async def test_query_triples_multiple_results(self, mock_trustgraph):
|
||||||
"""Test query returning multiple results"""
|
"""Test query returning multiple results"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -524,8 +533,8 @@ class TestCassandraQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -544,7 +553,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_get_po_query_optimization(self, mock_trustgraph):
|
async def test_get_po_query_optimization(self, mock_trustgraph):
|
||||||
"""Test that get_po queries use optimized table (no ALLOW FILTERING)"""
|
"""Test that get_po queries use optimized table (no ALLOW FILTERING)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -560,8 +569,8 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value='test_predicate', is_uri=False),
|
p=Term(type=LITERAL, value='test_predicate'),
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=50
|
limit=50
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -569,7 +578,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
|
|
||||||
# Verify get_po was called (should use optimized po_table)
|
# Verify get_po was called (should use optimized po_table)
|
||||||
mock_tg_instance.get_po.assert_called_once_with(
|
mock_tg_instance.get_po.assert_called_once_with(
|
||||||
'test_collection', 'test_predicate', 'test_object', limit=50
|
'test_collection', 'test_predicate', 'test_object', g=None, limit=50
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|
@ -581,7 +590,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_get_os_query_optimization(self, mock_trustgraph):
|
async def test_get_os_query_optimization(self, mock_trustgraph):
|
||||||
"""Test that get_os queries use optimized table (no ALLOW FILTERING)"""
|
"""Test that get_os queries use optimized table (no ALLOW FILTERING)"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -596,9 +605,9 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value='test_subject', is_uri=False),
|
s=Term(type=LITERAL, value='test_subject'),
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value='test_object', is_uri=False),
|
o=Term(type=LITERAL, value='test_object'),
|
||||||
limit=25
|
limit=25
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -606,7 +615,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
|
|
||||||
# Verify get_os was called (should use optimized subject_table with clustering)
|
# Verify get_os was called (should use optimized subject_table with clustering)
|
||||||
mock_tg_instance.get_os.assert_called_once_with(
|
mock_tg_instance.get_os.assert_called_once_with(
|
||||||
'test_collection', 'test_object', 'test_subject', limit=25
|
'test_collection', 'test_object', 'test_subject', g=None, limit=25
|
||||||
)
|
)
|
||||||
|
|
||||||
assert len(result) == 1
|
assert len(result) == 1
|
||||||
|
|
@ -618,7 +627,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_all_query_patterns_use_correct_tables(self, mock_trustgraph):
|
async def test_all_query_patterns_use_correct_tables(self, mock_trustgraph):
|
||||||
"""Test that all query patterns route to their optimal tables"""
|
"""Test that all query patterns route to their optimal tables"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -655,9 +664,9 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value=s, is_uri=False) if s else None,
|
s=Term(type=LITERAL, value=s) if s else None,
|
||||||
p=Value(value=p, is_uri=False) if p else None,
|
p=Term(type=LITERAL, value=p) if p else None,
|
||||||
o=Value(value=o, is_uri=False) if o else None,
|
o=Term(type=LITERAL, value=o) if o else None,
|
||||||
limit=10
|
limit=10
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -690,7 +699,7 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
@patch('trustgraph.query.triples.cassandra.service.KnowledgeGraph')
|
||||||
async def test_performance_critical_po_query_no_filtering(self, mock_trustgraph):
|
async def test_performance_critical_po_query_no_filtering(self, mock_trustgraph):
|
||||||
"""Test the performance-critical PO query that eliminates ALLOW FILTERING"""
|
"""Test the performance-critical PO query that eliminates ALLOW FILTERING"""
|
||||||
from trustgraph.schema import TriplesQueryRequest, Value
|
from trustgraph.schema import TriplesQueryRequest, Term, IRI, LITERAL
|
||||||
|
|
||||||
mock_tg_instance = MagicMock()
|
mock_tg_instance = MagicMock()
|
||||||
mock_trustgraph.return_value = mock_tg_instance
|
mock_trustgraph.return_value = mock_tg_instance
|
||||||
|
|
@ -711,8 +720,8 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
user='large_dataset_user',
|
user='large_dataset_user',
|
||||||
collection='massive_collection',
|
collection='massive_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value='http://www.w3.org/1999/02/22-rdf-syntax-ns#type', is_uri=True),
|
p=Term(type=IRI, iri='http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
|
||||||
o=Value(value='http://example.com/Person', is_uri=True),
|
o=Term(type=IRI, iri='http://example.com/Person'),
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -723,14 +732,15 @@ class TestCassandraQueryPerformanceOptimizations:
|
||||||
'massive_collection',
|
'massive_collection',
|
||||||
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
|
'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
|
||||||
'http://example.com/Person',
|
'http://example.com/Person',
|
||||||
|
g=None,
|
||||||
limit=1000
|
limit=1000
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify all results were returned
|
# Verify all results were returned
|
||||||
assert len(result) == 5
|
assert len(result) == 5
|
||||||
for i, triple in enumerate(result):
|
for i, triple in enumerate(result):
|
||||||
assert triple.s.value == f'subject_{i}'
|
assert triple.s.value == f'subject_{i}' # Mock returns literal values
|
||||||
assert triple.p.value == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
|
assert triple.p.iri == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
|
||||||
assert triple.p.is_uri is True
|
assert triple.p.type == IRI
|
||||||
assert triple.o.value == 'http://example.com/Person'
|
assert triple.o.iri == 'http://example.com/Person' # URIs use .iri
|
||||||
assert triple.o.is_uri is True
|
assert triple.o.type == IRI
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.falkordb.service import Processor
|
from trustgraph.query.triples.falkordb.service import Processor
|
||||||
from trustgraph.schema import Value, TriplesQueryRequest
|
from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestFalkorDBQueryProcessor:
|
class TestFalkorDBQueryProcessor:
|
||||||
|
|
@ -26,49 +26,49 @@ class TestFalkorDBQueryProcessor:
|
||||||
"""Test create_value with HTTP URI"""
|
"""Test create_value with HTTP URI"""
|
||||||
result = processor.create_value("http://example.com/resource")
|
result = processor.create_value("http://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_https_uri(self, processor):
|
def test_create_value_with_https_uri(self, processor):
|
||||||
"""Test create_value with HTTPS URI"""
|
"""Test create_value with HTTPS URI"""
|
||||||
result = processor.create_value("https://example.com/resource")
|
result = processor.create_value("https://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "https://example.com/resource"
|
assert result.iri == "https://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_literal(self, processor):
|
def test_create_value_with_literal(self, processor):
|
||||||
"""Test create_value with literal value"""
|
"""Test create_value with literal value"""
|
||||||
result = processor.create_value("just a literal string")
|
result = processor.create_value("just a literal string")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "just a literal string"
|
assert result.value == "just a literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_empty_string(self, processor):
|
def test_create_value_with_empty_string(self, processor):
|
||||||
"""Test create_value with empty string"""
|
"""Test create_value with empty string"""
|
||||||
result = processor.create_value("")
|
result = processor.create_value("")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == ""
|
assert result.value == ""
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_partial_uri(self, processor):
|
def test_create_value_with_partial_uri(self, processor):
|
||||||
"""Test create_value with string that looks like URI but isn't complete"""
|
"""Test create_value with string that looks like URI but isn't complete"""
|
||||||
result = processor.create_value("http")
|
result = processor.create_value("http")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http"
|
assert result.value == "http"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_ftp_uri(self, processor):
|
def test_create_value_with_ftp_uri(self, processor):
|
||||||
"""Test create_value with FTP URI (should not be detected as URI)"""
|
"""Test create_value with FTP URI (should not be detected as URI)"""
|
||||||
result = processor.create_value("ftp://example.com/file")
|
result = processor.create_value("ftp://example.com/file")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "ftp://example.com/file"
|
assert result.value == "ftp://example.com/file"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
def test_processor_initialization_with_defaults(self, mock_falkordb):
|
def test_processor_initialization_with_defaults(self, mock_falkordb):
|
||||||
|
|
@ -125,9 +125,9 @@ class TestFalkorDBQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -138,8 +138,8 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify result contains the queried triple (appears twice - once from each query)
|
# Verify result contains the queried triple (appears twice - once from each query)
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
|
|
@ -166,8 +166,8 @@ class TestFalkorDBQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -179,13 +179,13 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different objects
|
# Verify results contain different objects
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal result"
|
assert result[0].o.value == "literal result"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "http://example.com/uri_result"
|
assert result[1].o.iri == "http://example.com/uri_result"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -211,9 +211,9 @@ class TestFalkorDBQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -224,12 +224,12 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different predicates
|
# Verify results contain different predicates
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
|
|
@ -256,7 +256,7 @@ class TestFalkorDBQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
@ -269,13 +269,13 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different predicate-object pairs
|
# Verify results contain different predicate-object pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "http://example.com/uri2"
|
assert result[1].o.iri == "http://example.com/uri2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -302,8 +302,8 @@ class TestFalkorDBQueryProcessor:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -314,12 +314,12 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subjects
|
# Verify results contain different subjects
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
|
|
@ -347,7 +347,7 @@ class TestFalkorDBQueryProcessor:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -359,13 +359,13 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subject-object pairs
|
# Verify results contain different subject-object pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "http://example.com/uri2"
|
assert result[1].o.iri == "http://example.com/uri2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -393,7 +393,7 @@ class TestFalkorDBQueryProcessor:
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -404,12 +404,12 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subject-predicate pairs
|
# Verify results contain different subject-predicate pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
|
|
@ -449,13 +449,13 @@ class TestFalkorDBQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different triples
|
# Verify results contain different triples
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/s1"
|
assert result[0].s.iri == "http://example.com/s1"
|
||||||
assert result[0].p.value == "http://example.com/p1"
|
assert result[0].p.iri == "http://example.com/p1"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/s2"
|
assert result[1].s.iri == "http://example.com/s2"
|
||||||
assert result[1].p.value == "http://example.com/p2"
|
assert result[1].p.iri == "http://example.com/p2"
|
||||||
assert result[1].o.value == "http://example.com/o2"
|
assert result[1].o.iri == "http://example.com/o2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
@patch('trustgraph.query.triples.falkordb.service.FalkorDB')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -476,7 +476,7 @@ class TestFalkorDBQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.memgraph.service import Processor
|
from trustgraph.query.triples.memgraph.service import Processor
|
||||||
from trustgraph.schema import Value, TriplesQueryRequest
|
from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestMemgraphQueryProcessor:
|
class TestMemgraphQueryProcessor:
|
||||||
|
|
@ -26,49 +26,49 @@ class TestMemgraphQueryProcessor:
|
||||||
"""Test create_value with HTTP URI"""
|
"""Test create_value with HTTP URI"""
|
||||||
result = processor.create_value("http://example.com/resource")
|
result = processor.create_value("http://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_https_uri(self, processor):
|
def test_create_value_with_https_uri(self, processor):
|
||||||
"""Test create_value with HTTPS URI"""
|
"""Test create_value with HTTPS URI"""
|
||||||
result = processor.create_value("https://example.com/resource")
|
result = processor.create_value("https://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "https://example.com/resource"
|
assert result.iri == "https://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_literal(self, processor):
|
def test_create_value_with_literal(self, processor):
|
||||||
"""Test create_value with literal value"""
|
"""Test create_value with literal value"""
|
||||||
result = processor.create_value("just a literal string")
|
result = processor.create_value("just a literal string")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "just a literal string"
|
assert result.value == "just a literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_empty_string(self, processor):
|
def test_create_value_with_empty_string(self, processor):
|
||||||
"""Test create_value with empty string"""
|
"""Test create_value with empty string"""
|
||||||
result = processor.create_value("")
|
result = processor.create_value("")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == ""
|
assert result.value == ""
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_partial_uri(self, processor):
|
def test_create_value_with_partial_uri(self, processor):
|
||||||
"""Test create_value with string that looks like URI but isn't complete"""
|
"""Test create_value with string that looks like URI but isn't complete"""
|
||||||
result = processor.create_value("http")
|
result = processor.create_value("http")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http"
|
assert result.value == "http"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_ftp_uri(self, processor):
|
def test_create_value_with_ftp_uri(self, processor):
|
||||||
"""Test create_value with FTP URI (should not be detected as URI)"""
|
"""Test create_value with FTP URI (should not be detected as URI)"""
|
||||||
result = processor.create_value("ftp://example.com/file")
|
result = processor.create_value("ftp://example.com/file")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "ftp://example.com/file"
|
assert result.value == "ftp://example.com/file"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
def test_processor_initialization_with_defaults(self, mock_graph_db):
|
def test_processor_initialization_with_defaults(self, mock_graph_db):
|
||||||
|
|
@ -124,9 +124,9 @@ class TestMemgraphQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -137,8 +137,8 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify result contains the queried triple (appears twice - once from each query)
|
# Verify result contains the queried triple (appears twice - once from each query)
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
|
|
@ -166,8 +166,8 @@ class TestMemgraphQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -179,13 +179,13 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different objects
|
# Verify results contain different objects
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal result"
|
assert result[0].o.value == "literal result"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "http://example.com/uri_result"
|
assert result[1].o.iri == "http://example.com/uri_result"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -212,9 +212,9 @@ class TestMemgraphQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -225,12 +225,12 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different predicates
|
# Verify results contain different predicates
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
|
|
@ -258,7 +258,7 @@ class TestMemgraphQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
@ -271,13 +271,13 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different predicate-object pairs
|
# Verify results contain different predicate-object pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "http://example.com/uri2"
|
assert result[1].o.iri == "http://example.com/uri2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -305,8 +305,8 @@ class TestMemgraphQueryProcessor:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -317,12 +317,12 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subjects
|
# Verify results contain different subjects
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
|
|
@ -351,7 +351,7 @@ class TestMemgraphQueryProcessor:
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -363,13 +363,13 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subject-object pairs
|
# Verify results contain different subject-object pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "http://example.com/uri2"
|
assert result[1].o.iri == "http://example.com/uri2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -398,7 +398,7 @@ class TestMemgraphQueryProcessor:
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=None,
|
s=None,
|
||||||
p=None,
|
p=None,
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -409,12 +409,12 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different subject-predicate pairs
|
# Verify results contain different subject-predicate pairs
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subj1"
|
assert result[0].s.iri == "http://example.com/subj1"
|
||||||
assert result[0].p.value == "http://example.com/pred1"
|
assert result[0].p.iri == "http://example.com/pred1"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subj2"
|
assert result[1].s.iri == "http://example.com/subj2"
|
||||||
assert result[1].p.value == "http://example.com/pred2"
|
assert result[1].p.iri == "http://example.com/pred2"
|
||||||
assert result[1].o.value == "literal object"
|
assert result[1].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
|
|
@ -455,13 +455,13 @@ class TestMemgraphQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different triples
|
# Verify results contain different triples
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/s1"
|
assert result[0].s.iri == "http://example.com/s1"
|
||||||
assert result[0].p.value == "http://example.com/p1"
|
assert result[0].p.iri == "http://example.com/p1"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/s2"
|
assert result[1].s.iri == "http://example.com/s2"
|
||||||
assert result[1].p.value == "http://example.com/p2"
|
assert result[1].p.iri == "http://example.com/p2"
|
||||||
assert result[1].o.value == "http://example.com/o2"
|
assert result[1].o.iri == "http://example.com/o2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
@patch('trustgraph.query.triples.memgraph.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -480,7 +480,7 @@ class TestMemgraphQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.query.triples.neo4j.service import Processor
|
from trustgraph.query.triples.neo4j.service import Processor
|
||||||
from trustgraph.schema import Value, TriplesQueryRequest
|
from trustgraph.schema import Term, TriplesQueryRequest, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestNeo4jQueryProcessor:
|
class TestNeo4jQueryProcessor:
|
||||||
|
|
@ -26,49 +26,49 @@ class TestNeo4jQueryProcessor:
|
||||||
"""Test create_value with HTTP URI"""
|
"""Test create_value with HTTP URI"""
|
||||||
result = processor.create_value("http://example.com/resource")
|
result = processor.create_value("http://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http://example.com/resource"
|
assert result.iri == "http://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_https_uri(self, processor):
|
def test_create_value_with_https_uri(self, processor):
|
||||||
"""Test create_value with HTTPS URI"""
|
"""Test create_value with HTTPS URI"""
|
||||||
result = processor.create_value("https://example.com/resource")
|
result = processor.create_value("https://example.com/resource")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "https://example.com/resource"
|
assert result.iri == "https://example.com/resource"
|
||||||
assert result.is_uri is True
|
assert result.type == IRI
|
||||||
|
|
||||||
def test_create_value_with_literal(self, processor):
|
def test_create_value_with_literal(self, processor):
|
||||||
"""Test create_value with literal value"""
|
"""Test create_value with literal value"""
|
||||||
result = processor.create_value("just a literal string")
|
result = processor.create_value("just a literal string")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "just a literal string"
|
assert result.value == "just a literal string"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_empty_string(self, processor):
|
def test_create_value_with_empty_string(self, processor):
|
||||||
"""Test create_value with empty string"""
|
"""Test create_value with empty string"""
|
||||||
result = processor.create_value("")
|
result = processor.create_value("")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == ""
|
assert result.value == ""
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_partial_uri(self, processor):
|
def test_create_value_with_partial_uri(self, processor):
|
||||||
"""Test create_value with string that looks like URI but isn't complete"""
|
"""Test create_value with string that looks like URI but isn't complete"""
|
||||||
result = processor.create_value("http")
|
result = processor.create_value("http")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "http"
|
assert result.value == "http"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
def test_create_value_with_ftp_uri(self, processor):
|
def test_create_value_with_ftp_uri(self, processor):
|
||||||
"""Test create_value with FTP URI (should not be detected as URI)"""
|
"""Test create_value with FTP URI (should not be detected as URI)"""
|
||||||
result = processor.create_value("ftp://example.com/file")
|
result = processor.create_value("ftp://example.com/file")
|
||||||
|
|
||||||
assert isinstance(result, Value)
|
assert isinstance(result, Term)
|
||||||
assert result.value == "ftp://example.com/file"
|
assert result.value == "ftp://example.com/file"
|
||||||
assert result.is_uri is False
|
assert result.type == LITERAL
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||||
def test_processor_initialization_with_defaults(self, mock_graph_db):
|
def test_processor_initialization_with_defaults(self, mock_graph_db):
|
||||||
|
|
@ -124,9 +124,9 @@ class TestNeo4jQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal object", is_uri=False),
|
o=Term(type=LITERAL, value="literal object"),
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -137,8 +137,8 @@ class TestNeo4jQueryProcessor:
|
||||||
|
|
||||||
# Verify result contains the queried triple (appears twice - once from each query)
|
# Verify result contains the queried triple (appears twice - once from each query)
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal object"
|
assert result[0].o.value == "literal object"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||||
|
|
@ -166,8 +166,8 @@ class TestNeo4jQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
)
|
)
|
||||||
|
|
@ -179,13 +179,13 @@ class TestNeo4jQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different objects
|
# Verify results contain different objects
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/subject"
|
assert result[0].s.iri == "http://example.com/subject"
|
||||||
assert result[0].p.value == "http://example.com/predicate"
|
assert result[0].p.iri == "http://example.com/predicate"
|
||||||
assert result[0].o.value == "literal result"
|
assert result[0].o.value == "literal result"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/subject"
|
assert result[1].s.iri == "http://example.com/subject"
|
||||||
assert result[1].p.value == "http://example.com/predicate"
|
assert result[1].p.iri == "http://example.com/predicate"
|
||||||
assert result[1].o.value == "http://example.com/uri_result"
|
assert result[1].o.iri == "http://example.com/uri_result"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -225,13 +225,13 @@ class TestNeo4jQueryProcessor:
|
||||||
|
|
||||||
# Verify results contain different triples
|
# Verify results contain different triples
|
||||||
assert len(result) == 2
|
assert len(result) == 2
|
||||||
assert result[0].s.value == "http://example.com/s1"
|
assert result[0].s.iri == "http://example.com/s1"
|
||||||
assert result[0].p.value == "http://example.com/p1"
|
assert result[0].p.iri == "http://example.com/p1"
|
||||||
assert result[0].o.value == "literal1"
|
assert result[0].o.value == "literal1"
|
||||||
|
|
||||||
assert result[1].s.value == "http://example.com/s2"
|
assert result[1].s.iri == "http://example.com/s2"
|
||||||
assert result[1].p.value == "http://example.com/p2"
|
assert result[1].p.iri == "http://example.com/p2"
|
||||||
assert result[1].o.value == "http://example.com/o2"
|
assert result[1].o.iri == "http://example.com/o2"
|
||||||
|
|
||||||
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
@patch('trustgraph.query.triples.neo4j.service.GraphDatabase')
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -250,7 +250,7 @@ class TestNeo4jQueryProcessor:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user='test_user',
|
user='test_user',
|
||||||
collection='test_collection',
|
collection='test_collection',
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=None,
|
p=None,
|
||||||
o=None,
|
o=None,
|
||||||
limit=100
|
limit=100
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.storage.graph_embeddings.milvus.write import Processor
|
from trustgraph.storage.graph_embeddings.milvus.write import Processor
|
||||||
from trustgraph.schema import Value, EntityEmbeddings
|
from trustgraph.schema import Term, EntityEmbeddings, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestMilvusGraphEmbeddingsStorageProcessor:
|
class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
|
|
@ -22,11 +22,11 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
|
|
||||||
# Create test entities with embeddings
|
# Create test entities with embeddings
|
||||||
entity1 = EntityEmbeddings(
|
entity1 = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/entity1', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/entity1'),
|
||||||
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
|
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
|
||||||
)
|
)
|
||||||
entity2 = EntityEmbeddings(
|
entity2 = EntityEmbeddings(
|
||||||
entity=Value(value='literal entity', is_uri=False),
|
entity=Term(type=LITERAL, value='literal entity'),
|
||||||
vectors=[[0.7, 0.8, 0.9]]
|
vectors=[[0.7, 0.8, 0.9]]
|
||||||
)
|
)
|
||||||
message.entities = [entity1, entity2]
|
message.entities = [entity1, entity2]
|
||||||
|
|
@ -84,7 +84,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
entity = EntityEmbeddings(
|
entity = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/entity', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/entity'),
|
||||||
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
|
vectors=[[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
|
||||||
)
|
)
|
||||||
message.entities = [entity]
|
message.entities = [entity]
|
||||||
|
|
@ -136,7 +136,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
entity = EntityEmbeddings(
|
entity = EntityEmbeddings(
|
||||||
entity=Value(value='', is_uri=False),
|
entity=Term(type=LITERAL, value=''),
|
||||||
vectors=[[0.1, 0.2, 0.3]]
|
vectors=[[0.1, 0.2, 0.3]]
|
||||||
)
|
)
|
||||||
message.entities = [entity]
|
message.entities = [entity]
|
||||||
|
|
@ -155,7 +155,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
entity = EntityEmbeddings(
|
entity = EntityEmbeddings(
|
||||||
entity=Value(value=None, is_uri=False),
|
entity=Term(type=LITERAL, value=None),
|
||||||
vectors=[[0.1, 0.2, 0.3]]
|
vectors=[[0.1, 0.2, 0.3]]
|
||||||
)
|
)
|
||||||
message.entities = [entity]
|
message.entities = [entity]
|
||||||
|
|
@ -174,15 +174,15 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
valid_entity = EntityEmbeddings(
|
valid_entity = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/valid', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/valid'),
|
||||||
vectors=[[0.1, 0.2, 0.3]]
|
vectors=[[0.1, 0.2, 0.3]]
|
||||||
)
|
)
|
||||||
empty_entity = EntityEmbeddings(
|
empty_entity = EntityEmbeddings(
|
||||||
entity=Value(value='', is_uri=False),
|
entity=Term(type=LITERAL, value=''),
|
||||||
vectors=[[0.4, 0.5, 0.6]]
|
vectors=[[0.4, 0.5, 0.6]]
|
||||||
)
|
)
|
||||||
none_entity = EntityEmbeddings(
|
none_entity = EntityEmbeddings(
|
||||||
entity=Value(value=None, is_uri=False),
|
entity=Term(type=LITERAL, value=None),
|
||||||
vectors=[[0.7, 0.8, 0.9]]
|
vectors=[[0.7, 0.8, 0.9]]
|
||||||
)
|
)
|
||||||
message.entities = [valid_entity, empty_entity, none_entity]
|
message.entities = [valid_entity, empty_entity, none_entity]
|
||||||
|
|
@ -217,7 +217,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
entity = EntityEmbeddings(
|
entity = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/entity', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/entity'),
|
||||||
vectors=[]
|
vectors=[]
|
||||||
)
|
)
|
||||||
message.entities = [entity]
|
message.entities = [entity]
|
||||||
|
|
@ -236,7 +236,7 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
entity = EntityEmbeddings(
|
entity = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/entity', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/entity'),
|
||||||
vectors=[
|
vectors=[
|
||||||
[0.1, 0.2], # 2D vector
|
[0.1, 0.2], # 2D vector
|
||||||
[0.3, 0.4, 0.5, 0.6], # 4D vector
|
[0.3, 0.4, 0.5, 0.6], # 4D vector
|
||||||
|
|
@ -269,11 +269,11 @@ class TestMilvusGraphEmbeddingsStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
uri_entity = EntityEmbeddings(
|
uri_entity = EntityEmbeddings(
|
||||||
entity=Value(value='http://example.com/uri_entity', is_uri=True),
|
entity=Term(type=IRI, iri='http://example.com/uri_entity'),
|
||||||
vectors=[[0.1, 0.2, 0.3]]
|
vectors=[[0.1, 0.2, 0.3]]
|
||||||
)
|
)
|
||||||
literal_entity = EntityEmbeddings(
|
literal_entity = EntityEmbeddings(
|
||||||
entity=Value(value='literal entity text', is_uri=False),
|
entity=Term(type=LITERAL, value='literal entity text'),
|
||||||
vectors=[[0.4, 0.5, 0.6]]
|
vectors=[[0.4, 0.5, 0.6]]
|
||||||
)
|
)
|
||||||
message.entities = [uri_entity, literal_entity]
|
message.entities = [uri_entity, literal_entity]
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from unittest import IsolatedAsyncioTestCase
|
||||||
|
|
||||||
# Import the service under test
|
# Import the service under test
|
||||||
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
|
from trustgraph.storage.graph_embeddings.qdrant.write import Processor
|
||||||
|
from trustgraph.schema import IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
||||||
|
|
@ -67,7 +68,8 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
||||||
mock_message.metadata.collection = 'test_collection'
|
mock_message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
mock_entity = MagicMock()
|
mock_entity = MagicMock()
|
||||||
mock_entity.entity.value = 'test_entity'
|
mock_entity.entity.type = IRI
|
||||||
|
mock_entity.entity.iri = 'test_entity'
|
||||||
mock_entity.vectors = [[0.1, 0.2, 0.3]] # Single vector with 3 dimensions
|
mock_entity.vectors = [[0.1, 0.2, 0.3]] # Single vector with 3 dimensions
|
||||||
|
|
||||||
mock_message.entities = [mock_entity]
|
mock_message.entities = [mock_entity]
|
||||||
|
|
@ -120,11 +122,13 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
||||||
mock_message.metadata.collection = 'multi_collection'
|
mock_message.metadata.collection = 'multi_collection'
|
||||||
|
|
||||||
mock_entity1 = MagicMock()
|
mock_entity1 = MagicMock()
|
||||||
mock_entity1.entity.value = 'entity_one'
|
mock_entity1.entity.type = IRI
|
||||||
|
mock_entity1.entity.iri = 'entity_one'
|
||||||
mock_entity1.vectors = [[0.1, 0.2]]
|
mock_entity1.vectors = [[0.1, 0.2]]
|
||||||
|
|
||||||
mock_entity2 = MagicMock()
|
mock_entity2 = MagicMock()
|
||||||
mock_entity2.entity.value = 'entity_two'
|
mock_entity2.entity.type = IRI
|
||||||
|
mock_entity2.entity.iri = 'entity_two'
|
||||||
mock_entity2.vectors = [[0.3, 0.4]]
|
mock_entity2.vectors = [[0.3, 0.4]]
|
||||||
|
|
||||||
mock_message.entities = [mock_entity1, mock_entity2]
|
mock_message.entities = [mock_entity1, mock_entity2]
|
||||||
|
|
@ -179,7 +183,8 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
||||||
mock_message.metadata.collection = 'vector_collection'
|
mock_message.metadata.collection = 'vector_collection'
|
||||||
|
|
||||||
mock_entity = MagicMock()
|
mock_entity = MagicMock()
|
||||||
mock_entity.entity.value = 'multi_vector_entity'
|
mock_entity.entity.type = IRI
|
||||||
|
mock_entity.entity.iri = 'multi_vector_entity'
|
||||||
mock_entity.vectors = [
|
mock_entity.vectors = [
|
||||||
[0.1, 0.2, 0.3],
|
[0.1, 0.2, 0.3],
|
||||||
[0.4, 0.5, 0.6],
|
[0.4, 0.5, 0.6],
|
||||||
|
|
@ -231,11 +236,12 @@ class TestQdrantGraphEmbeddingsStorage(IsolatedAsyncioTestCase):
|
||||||
mock_message.metadata.collection = 'empty_collection'
|
mock_message.metadata.collection = 'empty_collection'
|
||||||
|
|
||||||
mock_entity_empty = MagicMock()
|
mock_entity_empty = MagicMock()
|
||||||
|
mock_entity_empty.entity.type = LITERAL
|
||||||
mock_entity_empty.entity.value = "" # Empty string
|
mock_entity_empty.entity.value = "" # Empty string
|
||||||
mock_entity_empty.vectors = [[0.1, 0.2]]
|
mock_entity_empty.vectors = [[0.1, 0.2]]
|
||||||
|
|
||||||
mock_entity_none = MagicMock()
|
mock_entity_none = MagicMock()
|
||||||
mock_entity_none.entity.value = None # None value
|
mock_entity_none.entity = None # None entity
|
||||||
mock_entity_none.vectors = [[0.3, 0.4]]
|
mock_entity_none.vectors = [[0.3, 0.4]]
|
||||||
|
|
||||||
mock_message.entities = [mock_entity_empty, mock_entity_none]
|
mock_message.entities = [mock_entity_empty, mock_entity_none]
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from unittest.mock import MagicMock, patch, call
|
||||||
|
|
||||||
from trustgraph.storage.triples.neo4j.write import Processor as StorageProcessor
|
from trustgraph.storage.triples.neo4j.write import Processor as StorageProcessor
|
||||||
from trustgraph.query.triples.neo4j.service import Processor as QueryProcessor
|
from trustgraph.query.triples.neo4j.service import Processor as QueryProcessor
|
||||||
from trustgraph.schema import Triples, Triple, Value, Metadata
|
from trustgraph.schema import Triples, Triple, Term, Metadata, IRI, LITERAL
|
||||||
from trustgraph.schema import TriplesQueryRequest
|
from trustgraph.schema import TriplesQueryRequest
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -60,9 +60,9 @@ class TestNeo4jUserCollectionIsolation:
|
||||||
)
|
)
|
||||||
|
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="literal_value", is_uri=False)
|
o=Term(type=LITERAL, value="literal_value")
|
||||||
)
|
)
|
||||||
|
|
||||||
message = Triples(
|
message = Triples(
|
||||||
|
|
@ -128,9 +128,9 @@ class TestNeo4jUserCollectionIsolation:
|
||||||
metadata = Metadata(id="test-id")
|
metadata = Metadata(id="test-id")
|
||||||
|
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="http://example.com/object", is_uri=True)
|
o=Term(type=IRI, iri="http://example.com/object")
|
||||||
)
|
)
|
||||||
|
|
||||||
message = Triples(
|
message = Triples(
|
||||||
|
|
@ -170,8 +170,8 @@ class TestNeo4jUserCollectionIsolation:
|
||||||
query = TriplesQueryRequest(
|
query = TriplesQueryRequest(
|
||||||
user="test_user",
|
user="test_user",
|
||||||
collection="test_collection",
|
collection="test_collection",
|
||||||
s=Value(value="http://example.com/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=None
|
o=None
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -254,9 +254,9 @@ class TestNeo4jUserCollectionIsolation:
|
||||||
metadata=Metadata(user="user1", collection="coll1"),
|
metadata=Metadata(user="user1", collection="coll1"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.com/user1/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/user1/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="user1_data", is_uri=False)
|
o=Term(type=LITERAL, value="user1_data")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -265,9 +265,9 @@ class TestNeo4jUserCollectionIsolation:
|
||||||
metadata=Metadata(user="user2", collection="coll2"),
|
metadata=Metadata(user="user2", collection="coll2"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value="http://example.com/user2/subject", is_uri=True),
|
s=Term(type=IRI, iri="http://example.com/user2/subject"),
|
||||||
p=Value(value="http://example.com/predicate", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/predicate"),
|
||||||
o=Value(value="user2_data", is_uri=False)
|
o=Term(type=LITERAL, value="user2_data")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -429,9 +429,9 @@ class TestNeo4jUserCollectionRegression:
|
||||||
metadata=Metadata(user="user1", collection="coll1"),
|
metadata=Metadata(user="user1", collection="coll1"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=shared_uri, is_uri=True),
|
s=Term(type=IRI, iri=shared_uri),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="user1_value", is_uri=False)
|
o=Term(type=LITERAL, value="user1_value")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
@ -440,9 +440,9 @@ class TestNeo4jUserCollectionRegression:
|
||||||
metadata=Metadata(user="user2", collection="coll2"),
|
metadata=Metadata(user="user2", collection="coll2"),
|
||||||
triples=[
|
triples=[
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=shared_uri, is_uri=True),
|
s=Term(type=IRI, iri=shared_uri),
|
||||||
p=Value(value="http://example.com/p", is_uri=True),
|
p=Term(type=IRI, iri="http://example.com/p"),
|
||||||
o=Value(value="user2_value", is_uri=False)
|
o=Term(type=LITERAL, value="user2_value")
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,8 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch, AsyncMock
|
from unittest.mock import MagicMock, patch, AsyncMock
|
||||||
|
|
||||||
from trustgraph.storage.triples.cassandra.write import Processor
|
from trustgraph.storage.triples.cassandra.write import Processor
|
||||||
from trustgraph.schema import Value, Triple
|
from trustgraph.schema import Triple, LITERAL
|
||||||
|
from trustgraph.direct.cassandra_kg import DEFAULT_GRAPH
|
||||||
|
|
||||||
|
|
||||||
class TestCassandraStorageProcessor:
|
class TestCassandraStorageProcessor:
|
||||||
|
|
@ -175,16 +176,24 @@ class TestCassandraStorageProcessor:
|
||||||
|
|
||||||
processor = Processor(taskgroup=taskgroup_mock)
|
processor = Processor(taskgroup=taskgroup_mock)
|
||||||
|
|
||||||
# Create mock triples
|
# Create mock triples with proper Term structure
|
||||||
triple1 = MagicMock()
|
triple1 = MagicMock()
|
||||||
|
triple1.s.type = LITERAL
|
||||||
triple1.s.value = 'subject1'
|
triple1.s.value = 'subject1'
|
||||||
|
triple1.p.type = LITERAL
|
||||||
triple1.p.value = 'predicate1'
|
triple1.p.value = 'predicate1'
|
||||||
|
triple1.o.type = LITERAL
|
||||||
triple1.o.value = 'object1'
|
triple1.o.value = 'object1'
|
||||||
|
triple1.g = None
|
||||||
|
|
||||||
triple2 = MagicMock()
|
triple2 = MagicMock()
|
||||||
|
triple2.s.type = LITERAL
|
||||||
triple2.s.value = 'subject2'
|
triple2.s.value = 'subject2'
|
||||||
|
triple2.p.type = LITERAL
|
||||||
triple2.p.value = 'predicate2'
|
triple2.p.value = 'predicate2'
|
||||||
|
triple2.o.type = LITERAL
|
||||||
triple2.o.value = 'object2'
|
triple2.o.value = 'object2'
|
||||||
|
triple2.g = None
|
||||||
|
|
||||||
# Create mock message
|
# Create mock message
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
|
|
@ -194,10 +203,10 @@ class TestCassandraStorageProcessor:
|
||||||
|
|
||||||
await processor.store_triples(mock_message)
|
await processor.store_triples(mock_message)
|
||||||
|
|
||||||
# Verify both triples were inserted
|
# Verify both triples were inserted (with g= parameter)
|
||||||
assert mock_tg_instance.insert.call_count == 2
|
assert mock_tg_instance.insert.call_count == 2
|
||||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1')
|
mock_tg_instance.insert.assert_any_call('collection1', 'subject1', 'predicate1', 'object1', g=DEFAULT_GRAPH)
|
||||||
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2')
|
mock_tg_instance.insert.assert_any_call('collection1', 'subject2', 'predicate2', 'object2', g=DEFAULT_GRAPH)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
@patch('trustgraph.storage.triples.cassandra.write.KnowledgeGraph')
|
||||||
|
|
@ -369,11 +378,15 @@ class TestCassandraStorageProcessor:
|
||||||
|
|
||||||
processor = Processor(taskgroup=taskgroup_mock)
|
processor = Processor(taskgroup=taskgroup_mock)
|
||||||
|
|
||||||
# Create triple with special characters
|
# Create triple with special characters and proper Term structure
|
||||||
triple = MagicMock()
|
triple = MagicMock()
|
||||||
|
triple.s.type = LITERAL
|
||||||
triple.s.value = 'subject with spaces & symbols'
|
triple.s.value = 'subject with spaces & symbols'
|
||||||
|
triple.p.type = LITERAL
|
||||||
triple.p.value = 'predicate:with/colons'
|
triple.p.value = 'predicate:with/colons'
|
||||||
|
triple.o.type = LITERAL
|
||||||
triple.o.value = 'object with "quotes" and unicode: ñáéíóú'
|
triple.o.value = 'object with "quotes" and unicode: ñáéíóú'
|
||||||
|
triple.g = None
|
||||||
|
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
mock_message.metadata.user = 'test_user'
|
mock_message.metadata.user = 'test_user'
|
||||||
|
|
@ -387,7 +400,8 @@ class TestCassandraStorageProcessor:
|
||||||
'test_collection',
|
'test_collection',
|
||||||
'subject with spaces & symbols',
|
'subject with spaces & symbols',
|
||||||
'predicate:with/colons',
|
'predicate:with/colons',
|
||||||
'object with "quotes" and unicode: ñáéíóú'
|
'object with "quotes" and unicode: ñáéíóú',
|
||||||
|
g=DEFAULT_GRAPH
|
||||||
)
|
)
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -475,11 +489,15 @@ class TestCassandraPerformanceOptimizations:
|
||||||
|
|
||||||
processor = Processor(taskgroup=taskgroup_mock)
|
processor = Processor(taskgroup=taskgroup_mock)
|
||||||
|
|
||||||
# Create test triple
|
# Create test triple with proper Term structure
|
||||||
triple = MagicMock()
|
triple = MagicMock()
|
||||||
|
triple.s.type = LITERAL
|
||||||
triple.s.value = 'test_subject'
|
triple.s.value = 'test_subject'
|
||||||
|
triple.p.type = LITERAL
|
||||||
triple.p.value = 'test_predicate'
|
triple.p.value = 'test_predicate'
|
||||||
|
triple.o.type = LITERAL
|
||||||
triple.o.value = 'test_object'
|
triple.o.value = 'test_object'
|
||||||
|
triple.g = None
|
||||||
|
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
mock_message.metadata.user = 'user1'
|
mock_message.metadata.user = 'user1'
|
||||||
|
|
@ -490,7 +508,8 @@ class TestCassandraPerformanceOptimizations:
|
||||||
|
|
||||||
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
|
# Verify insert was called for the triple (implementation details tested in KnowledgeGraph)
|
||||||
mock_tg_instance.insert.assert_called_once_with(
|
mock_tg_instance.insert.assert_called_once_with(
|
||||||
'collection1', 'test_subject', 'test_predicate', 'test_object'
|
'collection1', 'test_subject', 'test_predicate', 'test_object',
|
||||||
|
g=DEFAULT_GRAPH
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_environment_variable_controls_mode(self):
|
def test_environment_variable_controls_mode(self):
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.storage.triples.falkordb.write import Processor
|
from trustgraph.storage.triples.falkordb.write import Processor
|
||||||
from trustgraph.schema import Value, Triple
|
from trustgraph.schema import Term, Triple, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestFalkorDBStorageProcessor:
|
class TestFalkorDBStorageProcessor:
|
||||||
|
|
@ -22,9 +22,9 @@ class TestFalkorDBStorageProcessor:
|
||||||
|
|
||||||
# Create a test triple
|
# Create a test triple
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value='http://example.com/subject', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject'),
|
||||||
p=Value(value='http://example.com/predicate', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate'),
|
||||||
o=Value(value='literal object', is_uri=False)
|
o=Term(type=LITERAL, value='literal object')
|
||||||
)
|
)
|
||||||
message.triples = [triple]
|
message.triples = [triple]
|
||||||
|
|
||||||
|
|
@ -183,9 +183,9 @@ class TestFalkorDBStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value='http://example.com/subject', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject'),
|
||||||
p=Value(value='http://example.com/predicate', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate'),
|
||||||
o=Value(value='http://example.com/object', is_uri=True)
|
o=Term(type=IRI, iri='http://example.com/object')
|
||||||
)
|
)
|
||||||
message.triples = [triple]
|
message.triples = [triple]
|
||||||
|
|
||||||
|
|
@ -269,14 +269,14 @@ class TestFalkorDBStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
triple1 = Triple(
|
triple1 = Triple(
|
||||||
s=Value(value='http://example.com/subject1', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject1'),
|
||||||
p=Value(value='http://example.com/predicate1', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate1'),
|
||||||
o=Value(value='literal object1', is_uri=False)
|
o=Term(type=LITERAL, value='literal object1')
|
||||||
)
|
)
|
||||||
triple2 = Triple(
|
triple2 = Triple(
|
||||||
s=Value(value='http://example.com/subject2', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject2'),
|
||||||
p=Value(value='http://example.com/predicate2', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate2'),
|
||||||
o=Value(value='http://example.com/object2', is_uri=True)
|
o=Term(type=IRI, iri='http://example.com/object2')
|
||||||
)
|
)
|
||||||
message.triples = [triple1, triple2]
|
message.triples = [triple1, triple2]
|
||||||
|
|
||||||
|
|
@ -337,14 +337,14 @@ class TestFalkorDBStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
triple1 = Triple(
|
triple1 = Triple(
|
||||||
s=Value(value='http://example.com/subject1', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject1'),
|
||||||
p=Value(value='http://example.com/predicate1', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate1'),
|
||||||
o=Value(value='literal object', is_uri=False)
|
o=Term(type=LITERAL, value='literal object')
|
||||||
)
|
)
|
||||||
triple2 = Triple(
|
triple2 = Triple(
|
||||||
s=Value(value='http://example.com/subject2', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject2'),
|
||||||
p=Value(value='http://example.com/predicate2', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate2'),
|
||||||
o=Value(value='http://example.com/object2', is_uri=True)
|
o=Term(type=IRI, iri='http://example.com/object2')
|
||||||
)
|
)
|
||||||
message.triples = [triple1, triple2]
|
message.triples = [triple1, triple2]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
from trustgraph.storage.triples.memgraph.write import Processor
|
from trustgraph.storage.triples.memgraph.write import Processor
|
||||||
from trustgraph.schema import Value, Triple
|
from trustgraph.schema import Term, Triple, IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestMemgraphStorageProcessor:
|
class TestMemgraphStorageProcessor:
|
||||||
|
|
@ -22,9 +22,9 @@ class TestMemgraphStorageProcessor:
|
||||||
|
|
||||||
# Create a test triple
|
# Create a test triple
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value='http://example.com/subject', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject'),
|
||||||
p=Value(value='http://example.com/predicate', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate'),
|
||||||
o=Value(value='literal object', is_uri=False)
|
o=Term(type=LITERAL, value='literal object')
|
||||||
)
|
)
|
||||||
message.triples = [triple]
|
message.triples = [triple]
|
||||||
|
|
||||||
|
|
@ -231,9 +231,9 @@ class TestMemgraphStorageProcessor:
|
||||||
mock_tx = MagicMock()
|
mock_tx = MagicMock()
|
||||||
|
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value='http://example.com/subject', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject'),
|
||||||
p=Value(value='http://example.com/predicate', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate'),
|
||||||
o=Value(value='http://example.com/object', is_uri=True)
|
o=Term(type=IRI, iri='http://example.com/object')
|
||||||
)
|
)
|
||||||
|
|
||||||
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
||||||
|
|
@ -265,9 +265,9 @@ class TestMemgraphStorageProcessor:
|
||||||
mock_tx = MagicMock()
|
mock_tx = MagicMock()
|
||||||
|
|
||||||
triple = Triple(
|
triple = Triple(
|
||||||
s=Value(value='http://example.com/subject', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject'),
|
||||||
p=Value(value='http://example.com/predicate', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate'),
|
||||||
o=Value(value='literal object', is_uri=False)
|
o=Term(type=LITERAL, value='literal object')
|
||||||
)
|
)
|
||||||
|
|
||||||
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
processor.create_triple(mock_tx, triple, "test_user", "test_collection")
|
||||||
|
|
@ -347,14 +347,14 @@ class TestMemgraphStorageProcessor:
|
||||||
message.metadata.collection = 'test_collection'
|
message.metadata.collection = 'test_collection'
|
||||||
|
|
||||||
triple1 = Triple(
|
triple1 = Triple(
|
||||||
s=Value(value='http://example.com/subject1', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject1'),
|
||||||
p=Value(value='http://example.com/predicate1', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate1'),
|
||||||
o=Value(value='literal object1', is_uri=False)
|
o=Term(type=LITERAL, value='literal object1')
|
||||||
)
|
)
|
||||||
triple2 = Triple(
|
triple2 = Triple(
|
||||||
s=Value(value='http://example.com/subject2', is_uri=True),
|
s=Term(type=IRI, iri='http://example.com/subject2'),
|
||||||
p=Value(value='http://example.com/predicate2', is_uri=True),
|
p=Term(type=IRI, iri='http://example.com/predicate2'),
|
||||||
o=Value(value='http://example.com/object2', is_uri=True)
|
o=Term(type=IRI, iri='http://example.com/object2')
|
||||||
)
|
)
|
||||||
message.triples = [triple1, triple2]
|
message.triples = [triple1, triple2]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import pytest
|
||||||
from unittest.mock import MagicMock, patch, AsyncMock
|
from unittest.mock import MagicMock, patch, AsyncMock
|
||||||
|
|
||||||
from trustgraph.storage.triples.neo4j.write import Processor
|
from trustgraph.storage.triples.neo4j.write import Processor
|
||||||
|
from trustgraph.schema import IRI, LITERAL
|
||||||
|
|
||||||
|
|
||||||
class TestNeo4jStorageProcessor:
|
class TestNeo4jStorageProcessor:
|
||||||
|
|
@ -257,10 +258,12 @@ class TestNeo4jStorageProcessor:
|
||||||
|
|
||||||
# Create mock triple with URI object
|
# Create mock triple with URI object
|
||||||
triple = MagicMock()
|
triple = MagicMock()
|
||||||
triple.s.value = "http://example.com/subject"
|
triple.s.type = IRI
|
||||||
triple.p.value = "http://example.com/predicate"
|
triple.s.iri = "http://example.com/subject"
|
||||||
triple.o.value = "http://example.com/object"
|
triple.p.type = IRI
|
||||||
triple.o.is_uri = True
|
triple.p.iri = "http://example.com/predicate"
|
||||||
|
triple.o.type = IRI
|
||||||
|
triple.o.iri = "http://example.com/object"
|
||||||
|
|
||||||
# Create mock message with metadata
|
# Create mock message with metadata
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
|
|
@ -327,10 +330,12 @@ class TestNeo4jStorageProcessor:
|
||||||
|
|
||||||
# Create mock triple with literal object
|
# Create mock triple with literal object
|
||||||
triple = MagicMock()
|
triple = MagicMock()
|
||||||
triple.s.value = "http://example.com/subject"
|
triple.s.type = IRI
|
||||||
triple.p.value = "http://example.com/predicate"
|
triple.s.iri = "http://example.com/subject"
|
||||||
|
triple.p.type = IRI
|
||||||
|
triple.p.iri = "http://example.com/predicate"
|
||||||
|
triple.o.type = LITERAL
|
||||||
triple.o.value = "literal value"
|
triple.o.value = "literal value"
|
||||||
triple.o.is_uri = False
|
|
||||||
|
|
||||||
# Create mock message with metadata
|
# Create mock message with metadata
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
|
|
@ -398,16 +403,20 @@ class TestNeo4jStorageProcessor:
|
||||||
|
|
||||||
# Create mock triples
|
# Create mock triples
|
||||||
triple1 = MagicMock()
|
triple1 = MagicMock()
|
||||||
triple1.s.value = "http://example.com/subject1"
|
triple1.s.type = IRI
|
||||||
triple1.p.value = "http://example.com/predicate1"
|
triple1.s.iri = "http://example.com/subject1"
|
||||||
triple1.o.value = "http://example.com/object1"
|
triple1.p.type = IRI
|
||||||
triple1.o.is_uri = True
|
triple1.p.iri = "http://example.com/predicate1"
|
||||||
|
triple1.o.type = IRI
|
||||||
|
triple1.o.iri = "http://example.com/object1"
|
||||||
|
|
||||||
triple2 = MagicMock()
|
triple2 = MagicMock()
|
||||||
triple2.s.value = "http://example.com/subject2"
|
triple2.s.type = IRI
|
||||||
triple2.p.value = "http://example.com/predicate2"
|
triple2.s.iri = "http://example.com/subject2"
|
||||||
|
triple2.p.type = IRI
|
||||||
|
triple2.p.iri = "http://example.com/predicate2"
|
||||||
|
triple2.o.type = LITERAL
|
||||||
triple2.o.value = "literal value"
|
triple2.o.value = "literal value"
|
||||||
triple2.o.is_uri = False
|
|
||||||
|
|
||||||
# Create mock message with metadata
|
# Create mock message with metadata
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
|
|
@ -550,10 +559,12 @@ class TestNeo4jStorageProcessor:
|
||||||
|
|
||||||
# Create triple with special characters
|
# Create triple with special characters
|
||||||
triple = MagicMock()
|
triple = MagicMock()
|
||||||
triple.s.value = "http://example.com/subject with spaces"
|
triple.s.type = IRI
|
||||||
triple.p.value = "http://example.com/predicate:with/symbols"
|
triple.s.iri = "http://example.com/subject with spaces"
|
||||||
|
triple.p.type = IRI
|
||||||
|
triple.p.iri = "http://example.com/predicate:with/symbols"
|
||||||
|
triple.o.type = LITERAL
|
||||||
triple.o.value = 'literal with "quotes" and unicode: ñáéíóú'
|
triple.o.value = 'literal with "quotes" and unicode: ñáéíóú'
|
||||||
triple.o.is_uri = False
|
|
||||||
|
|
||||||
mock_message = MagicMock()
|
mock_message = MagicMock()
|
||||||
mock_message.triples = [triple]
|
mock_message.triples = [triple]
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,27 @@ import json
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from .. knowledge import hash, Uri, Literal
|
from .. knowledge import hash, Uri, Literal
|
||||||
|
from .. schema import IRI, LITERAL
|
||||||
from . types import Triple
|
from . types import Triple
|
||||||
from . exceptions import ProtocolException
|
from . exceptions import ProtocolException
|
||||||
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
if x["e"]: return Uri(x["v"])
|
"""Convert wire format to Uri or Literal."""
|
||||||
return Literal(x["v"])
|
if x.get("t") == IRI:
|
||||||
|
return Uri(x.get("i", ""))
|
||||||
|
elif x.get("t") == LITERAL:
|
||||||
|
return Literal(x.get("v", ""))
|
||||||
|
# Fallback for any other type
|
||||||
|
return Literal(x.get("v", x.get("i", "")))
|
||||||
|
|
||||||
|
|
||||||
|
def from_value(v):
|
||||||
|
"""Convert Uri or Literal to wire format."""
|
||||||
|
if isinstance(v, Uri):
|
||||||
|
return {"t": IRI, "i": str(v)}
|
||||||
|
else:
|
||||||
|
return {"t": LITERAL, "v": str(v)}
|
||||||
|
|
||||||
class Flow:
|
class Flow:
|
||||||
"""
|
"""
|
||||||
|
|
@ -751,17 +766,17 @@ class FlowInstance:
|
||||||
if s:
|
if s:
|
||||||
if not isinstance(s, Uri):
|
if not isinstance(s, Uri):
|
||||||
raise RuntimeError("s must be Uri")
|
raise RuntimeError("s must be Uri")
|
||||||
input["s"] = { "v": str(s), "e": isinstance(s, Uri), }
|
input["s"] = from_value(s)
|
||||||
|
|
||||||
if p:
|
if p:
|
||||||
if not isinstance(p, Uri):
|
if not isinstance(p, Uri):
|
||||||
raise RuntimeError("p must be Uri")
|
raise RuntimeError("p must be Uri")
|
||||||
input["p"] = { "v": str(p), "e": isinstance(p, Uri), }
|
input["p"] = from_value(p)
|
||||||
|
|
||||||
if o:
|
if o:
|
||||||
if not isinstance(o, Uri) and not isinstance(o, Literal):
|
if not isinstance(o, Uri) and not isinstance(o, Literal):
|
||||||
raise RuntimeError("o must be Uri or Literal")
|
raise RuntimeError("o must be Uri or Literal")
|
||||||
input["o"] = { "v": str(o), "e": isinstance(o, Uri), }
|
input["o"] = from_value(o)
|
||||||
|
|
||||||
object = self.request(
|
object = self.request(
|
||||||
"service/triples",
|
"service/triples",
|
||||||
|
|
@ -834,9 +849,9 @@ class FlowInstance:
|
||||||
if metadata:
|
if metadata:
|
||||||
metadata.emit(
|
metadata.emit(
|
||||||
lambda t: triples.append({
|
lambda t: triples.append({
|
||||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
"s": from_value(t["s"]),
|
||||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
"p": from_value(t["p"]),
|
||||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
"o": from_value(t["o"]),
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -913,9 +928,9 @@ class FlowInstance:
|
||||||
if metadata:
|
if metadata:
|
||||||
metadata.emit(
|
metadata.emit(
|
||||||
lambda t: triples.append({
|
lambda t: triples.append({
|
||||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
"s": from_value(t["s"]),
|
||||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
"p": from_value(t["p"]),
|
||||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
"o": from_value(t["o"]),
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,18 @@ import json
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from .. knowledge import hash, Uri, Literal
|
from .. knowledge import hash, Uri, Literal
|
||||||
|
from .. schema import IRI, LITERAL
|
||||||
from . types import Triple
|
from . types import Triple
|
||||||
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
if x["e"]: return Uri(x["v"])
|
"""Convert wire format to Uri or Literal."""
|
||||||
return Literal(x["v"])
|
if x.get("t") == IRI:
|
||||||
|
return Uri(x.get("i", ""))
|
||||||
|
elif x.get("t") == LITERAL:
|
||||||
|
return Literal(x.get("v", ""))
|
||||||
|
# Fallback for any other type
|
||||||
|
return Literal(x.get("v", x.get("i", "")))
|
||||||
|
|
||||||
class Knowledge:
|
class Knowledge:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -12,13 +12,28 @@ import logging
|
||||||
|
|
||||||
from . types import DocumentMetadata, ProcessingMetadata, Triple
|
from . types import DocumentMetadata, ProcessingMetadata, Triple
|
||||||
from .. knowledge import hash, Uri, Literal
|
from .. knowledge import hash, Uri, Literal
|
||||||
|
from .. schema import IRI, LITERAL
|
||||||
from . exceptions import *
|
from . exceptions import *
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
if x["e"]: return Uri(x["v"])
|
"""Convert wire format to Uri or Literal."""
|
||||||
return Literal(x["v"])
|
if x.get("t") == IRI:
|
||||||
|
return Uri(x.get("i", ""))
|
||||||
|
elif x.get("t") == LITERAL:
|
||||||
|
return Literal(x.get("v", ""))
|
||||||
|
# Fallback for any other type
|
||||||
|
return Literal(x.get("v", x.get("i", "")))
|
||||||
|
|
||||||
|
|
||||||
|
def from_value(v):
|
||||||
|
"""Convert Uri or Literal to wire format."""
|
||||||
|
if isinstance(v, Uri):
|
||||||
|
return {"t": IRI, "i": str(v)}
|
||||||
|
else:
|
||||||
|
return {"t": LITERAL, "v": str(v)}
|
||||||
|
|
||||||
class Library:
|
class Library:
|
||||||
"""
|
"""
|
||||||
|
|
@ -118,18 +133,18 @@ class Library:
|
||||||
if isinstance(metadata, list):
|
if isinstance(metadata, list):
|
||||||
triples = [
|
triples = [
|
||||||
{
|
{
|
||||||
"s": { "v": t.s, "e": isinstance(t.s, Uri) },
|
"s": from_value(t.s),
|
||||||
"p": { "v": t.p, "e": isinstance(t.p, Uri) },
|
"p": from_value(t.p),
|
||||||
"o": { "v": t.o, "e": isinstance(t.o, Uri) }
|
"o": from_value(t.o),
|
||||||
}
|
}
|
||||||
for t in metadata
|
for t in metadata
|
||||||
]
|
]
|
||||||
elif hasattr(metadata, "emit"):
|
elif hasattr(metadata, "emit"):
|
||||||
metadata.emit(
|
metadata.emit(
|
||||||
lambda t: triples.append({
|
lambda t: triples.append({
|
||||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
"s": from_value(t["s"]),
|
||||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
"p": from_value(t["p"]),
|
||||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
"o": from_value(t["o"]),
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
@ -315,9 +330,9 @@ class Library:
|
||||||
"comments": metadata.comments,
|
"comments": metadata.comments,
|
||||||
"metadata": [
|
"metadata": [
|
||||||
{
|
{
|
||||||
"s": { "v": t["s"], "e": isinstance(t["s"], Uri) },
|
"s": from_value(t["s"]),
|
||||||
"p": { "v": t["p"], "e": isinstance(t["p"], Uri) },
|
"p": from_value(t["p"]),
|
||||||
"o": { "v": t["o"], "e": isinstance(t["o"], Uri) }
|
"o": from_value(t["o"]),
|
||||||
}
|
}
|
||||||
for t in metadata.metadata
|
for t in metadata.metadata
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ embeddings.
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||||
from .. schema import Error, Value
|
from .. schema import Error, Term
|
||||||
|
|
||||||
from . flow_processor import FlowProcessor
|
from . flow_processor import FlowProcessor
|
||||||
from . consumer_spec import ConsumerSpec
|
from . consumer_spec import ConsumerSpec
|
||||||
|
|
|
||||||
|
|
@ -2,15 +2,21 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse, IRI, LITERAL
|
||||||
from .. knowledge import Uri, Literal
|
from .. knowledge import Uri, Literal
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
if x.is_uri: return Uri(x.value)
|
"""Convert schema Term to Uri or Literal."""
|
||||||
|
if x.type == IRI:
|
||||||
|
return Uri(x.iri)
|
||||||
|
elif x.type == LITERAL:
|
||||||
return Literal(x.value)
|
return Literal(x.value)
|
||||||
|
# Fallback
|
||||||
|
return Literal(x.value or x.iri)
|
||||||
|
|
||||||
class GraphEmbeddingsClient(RequestResponse):
|
class GraphEmbeddingsClient(RequestResponse):
|
||||||
async def query(self, vectors, limit=20, user="trustgraph",
|
async def query(self, vectors, limit=20, user="trustgraph",
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ embeddings.
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||||
from .. schema import Error, Value
|
from .. schema import Error, Term
|
||||||
|
|
||||||
from . flow_processor import FlowProcessor
|
from . flow_processor import FlowProcessor
|
||||||
from . consumer_spec import ConsumerSpec
|
from . consumer_spec import ConsumerSpec
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,34 @@
|
||||||
|
|
||||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||||
from .. knowledge import Uri, Literal
|
from .. knowledge import Uri, Literal
|
||||||
|
|
||||||
|
|
||||||
class Triple:
|
class Triple:
|
||||||
def __init__(self, s, p, o):
|
def __init__(self, s, p, o):
|
||||||
self.s = s
|
self.s = s
|
||||||
self.p = p
|
self.p = p
|
||||||
self.o = o
|
self.o = o
|
||||||
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
if x.is_uri: return Uri(x.value)
|
"""Convert schema Term to Uri or Literal."""
|
||||||
|
if x.type == IRI:
|
||||||
|
return Uri(x.iri)
|
||||||
|
elif x.type == LITERAL:
|
||||||
return Literal(x.value)
|
return Literal(x.value)
|
||||||
|
# Fallback
|
||||||
|
return Literal(x.value or x.iri)
|
||||||
|
|
||||||
|
|
||||||
def from_value(x):
|
def from_value(x):
|
||||||
if x is None: return None
|
"""Convert Uri or Literal to schema Term."""
|
||||||
|
if x is None:
|
||||||
|
return None
|
||||||
if isinstance(x, Uri):
|
if isinstance(x, Uri):
|
||||||
return Value(value=str(x), is_uri=True)
|
return Term(type=IRI, iri=str(x))
|
||||||
else:
|
else:
|
||||||
return Value(value=str(x), is_uri=False)
|
return Term(type=LITERAL, value=str(x))
|
||||||
|
|
||||||
class TriplesClient(RequestResponse):
|
class TriplesClient(RequestResponse):
|
||||||
async def query(self, s=None, p=None, o=None, limit=20,
|
async def query(self, s=None, p=None, o=None, limit=20,
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ null. Output is a list of triples.
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .. schema import Value, Triple
|
from .. schema import Term, Triple
|
||||||
|
|
||||||
from . flow_processor import FlowProcessor
|
from . flow_processor import FlowProcessor
|
||||||
from . consumer_spec import ConsumerSpec
|
from . consumer_spec import ConsumerSpec
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import _pulsar
|
import _pulsar
|
||||||
|
|
||||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Value
|
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||||
from .. schema import triples_request_queue
|
from .. schema import triples_request_queue
|
||||||
from .. schema import triples_response_queue
|
from .. schema import triples_response_queue
|
||||||
from . base import BaseClient
|
from . base import BaseClient
|
||||||
|
|
@ -46,9 +46,9 @@ class TriplesQueryClient(BaseClient):
|
||||||
if ent == None: return None
|
if ent == None: return None
|
||||||
|
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
|
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
def request(
|
def request(
|
||||||
self,
|
self,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from .base import Translator, MessageTranslator
|
from .base import Translator, MessageTranslator
|
||||||
from .primitives import ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
|
from .primitives import TermTranslator, ValueTranslator, TripleTranslator, SubgraphTranslator, RowSchemaTranslator, FieldTranslator, row_schema_translator, field_translator
|
||||||
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
|
from .metadata import DocumentMetadataTranslator, ProcessingMetadataTranslator
|
||||||
from .agent import AgentRequestTranslator, AgentResponseTranslator
|
from .agent import AgentRequestTranslator, AgentResponseTranslator
|
||||||
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator
|
from .embeddings import EmbeddingsRequestTranslator, EmbeddingsResponseTranslator
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,133 @@
|
||||||
from typing import Dict, Any, List
|
from typing import Dict, Any, List
|
||||||
from ...schema import Value, Triple, RowSchema, Field
|
from ...schema import Term, Triple, RowSchema, Field, IRI, BLANK, LITERAL, TRIPLE
|
||||||
from .base import Translator
|
from .base import Translator
|
||||||
|
|
||||||
|
|
||||||
class ValueTranslator(Translator):
|
class TermTranslator(Translator):
|
||||||
"""Translator for Value schema objects"""
|
"""
|
||||||
|
Translator for Term schema objects.
|
||||||
|
|
||||||
def to_pulsar(self, data: Dict[str, Any]) -> Value:
|
Wire format (compact keys):
|
||||||
return Value(value=data["v"], is_uri=data["e"])
|
- "t": type (i/b/l/t)
|
||||||
|
- "i": iri (for IRI type)
|
||||||
|
- "d": id (for BLANK type)
|
||||||
|
- "v": value (for LITERAL type)
|
||||||
|
- "dt": datatype (for LITERAL type)
|
||||||
|
- "ln": language (for LITERAL type)
|
||||||
|
- "tr": triple (for TRIPLE type, nested)
|
||||||
|
"""
|
||||||
|
|
||||||
def from_pulsar(self, obj: Value) -> Dict[str, Any]:
|
def to_pulsar(self, data: Dict[str, Any]) -> Term:
|
||||||
return {"v": obj.value, "e": obj.is_uri}
|
term_type = data.get("t", "")
|
||||||
|
|
||||||
|
if term_type == IRI:
|
||||||
|
return Term(type=IRI, iri=data.get("i", ""))
|
||||||
|
|
||||||
|
elif term_type == BLANK:
|
||||||
|
return Term(type=BLANK, id=data.get("d", ""))
|
||||||
|
|
||||||
|
elif term_type == LITERAL:
|
||||||
|
return Term(
|
||||||
|
type=LITERAL,
|
||||||
|
value=data.get("v", ""),
|
||||||
|
datatype=data.get("dt", ""),
|
||||||
|
language=data.get("ln", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
elif term_type == TRIPLE:
|
||||||
|
# Nested triple - use TripleTranslator
|
||||||
|
triple_data = data.get("tr")
|
||||||
|
if triple_data:
|
||||||
|
triple = _triple_translator_to_pulsar(triple_data)
|
||||||
|
else:
|
||||||
|
triple = None
|
||||||
|
return Term(type=TRIPLE, triple=triple)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Unknown or empty type
|
||||||
|
return Term(type=term_type)
|
||||||
|
|
||||||
|
def from_pulsar(self, obj: Term) -> Dict[str, Any]:
|
||||||
|
result: Dict[str, Any] = {"t": obj.type}
|
||||||
|
|
||||||
|
if obj.type == IRI:
|
||||||
|
result["i"] = obj.iri
|
||||||
|
|
||||||
|
elif obj.type == BLANK:
|
||||||
|
result["d"] = obj.id
|
||||||
|
|
||||||
|
elif obj.type == LITERAL:
|
||||||
|
result["v"] = obj.value
|
||||||
|
if obj.datatype:
|
||||||
|
result["dt"] = obj.datatype
|
||||||
|
if obj.language:
|
||||||
|
result["ln"] = obj.language
|
||||||
|
|
||||||
|
elif obj.type == TRIPLE:
|
||||||
|
if obj.triple:
|
||||||
|
result["tr"] = _triple_translator_from_pulsar(obj.triple)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level helper functions to avoid circular instantiation
|
||||||
|
def _triple_translator_to_pulsar(data: Dict[str, Any]) -> Triple:
|
||||||
|
term_translator = TermTranslator()
|
||||||
|
return Triple(
|
||||||
|
s=term_translator.to_pulsar(data["s"]) if data.get("s") else None,
|
||||||
|
p=term_translator.to_pulsar(data["p"]) if data.get("p") else None,
|
||||||
|
o=term_translator.to_pulsar(data["o"]) if data.get("o") else None,
|
||||||
|
g=data.get("g"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _triple_translator_from_pulsar(obj: Triple) -> Dict[str, Any]:
|
||||||
|
term_translator = TermTranslator()
|
||||||
|
result: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
if obj.s:
|
||||||
|
result["s"] = term_translator.from_pulsar(obj.s)
|
||||||
|
if obj.p:
|
||||||
|
result["p"] = term_translator.from_pulsar(obj.p)
|
||||||
|
if obj.o:
|
||||||
|
result["o"] = term_translator.from_pulsar(obj.o)
|
||||||
|
if obj.g:
|
||||||
|
result["g"] = obj.g
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
class TripleTranslator(Translator):
|
class TripleTranslator(Translator):
|
||||||
"""Translator for Triple schema objects"""
|
"""Translator for Triple schema objects (quads with optional graph)"""
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.value_translator = ValueTranslator()
|
self.term_translator = TermTranslator()
|
||||||
|
|
||||||
def to_pulsar(self, data: Dict[str, Any]) -> Triple:
|
def to_pulsar(self, data: Dict[str, Any]) -> Triple:
|
||||||
return Triple(
|
return Triple(
|
||||||
s=self.value_translator.to_pulsar(data["s"]),
|
s=self.term_translator.to_pulsar(data["s"]) if data.get("s") else None,
|
||||||
p=self.value_translator.to_pulsar(data["p"]),
|
p=self.term_translator.to_pulsar(data["p"]) if data.get("p") else None,
|
||||||
o=self.value_translator.to_pulsar(data["o"])
|
o=self.term_translator.to_pulsar(data["o"]) if data.get("o") else None,
|
||||||
|
g=data.get("g"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def from_pulsar(self, obj: Triple) -> Dict[str, Any]:
|
def from_pulsar(self, obj: Triple) -> Dict[str, Any]:
|
||||||
return {
|
result: Dict[str, Any] = {}
|
||||||
"s": self.value_translator.from_pulsar(obj.s),
|
|
||||||
"p": self.value_translator.from_pulsar(obj.p),
|
if obj.s:
|
||||||
"o": self.value_translator.from_pulsar(obj.o)
|
result["s"] = self.term_translator.from_pulsar(obj.s)
|
||||||
}
|
if obj.p:
|
||||||
|
result["p"] = self.term_translator.from_pulsar(obj.p)
|
||||||
|
if obj.o:
|
||||||
|
result["o"] = self.term_translator.from_pulsar(obj.o)
|
||||||
|
if obj.g:
|
||||||
|
result["g"] = obj.g
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# Backward compatibility alias
|
||||||
|
ValueTranslator = TermTranslator
|
||||||
|
|
||||||
|
|
||||||
class SubgraphTranslator(Translator):
|
class SubgraphTranslator(Translator):
|
||||||
|
|
|
||||||
|
|
@ -14,11 +14,13 @@ class TriplesQueryRequestTranslator(MessageTranslator):
|
||||||
s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None
|
s = self.value_translator.to_pulsar(data["s"]) if "s" in data else None
|
||||||
p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None
|
p = self.value_translator.to_pulsar(data["p"]) if "p" in data else None
|
||||||
o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None
|
o = self.value_translator.to_pulsar(data["o"]) if "o" in data else None
|
||||||
|
g = data.get("g") # None=default graph, "*"=all graphs
|
||||||
|
|
||||||
return TriplesQueryRequest(
|
return TriplesQueryRequest(
|
||||||
s=s,
|
s=s,
|
||||||
p=p,
|
p=p,
|
||||||
o=o,
|
o=o,
|
||||||
|
g=g,
|
||||||
limit=int(data.get("limit", 10000)),
|
limit=int(data.get("limit", 10000)),
|
||||||
user=data.get("user", "trustgraph"),
|
user=data.get("user", "trustgraph"),
|
||||||
collection=data.get("collection", "default")
|
collection=data.get("collection", "default")
|
||||||
|
|
@ -37,6 +39,8 @@ class TriplesQueryRequestTranslator(MessageTranslator):
|
||||||
result["p"] = self.value_translator.from_pulsar(obj.p)
|
result["p"] = self.value_translator.from_pulsar(obj.p)
|
||||||
if obj.o:
|
if obj.o:
|
||||||
result["o"] = self.value_translator.from_pulsar(obj.o)
|
result["o"] = self.value_translator.from_pulsar(obj.o)
|
||||||
|
if obj.g is not None:
|
||||||
|
result["g"] = obj.g
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,57 @@
|
||||||
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
# Term type constants
|
||||||
|
IRI = "i" # IRI/URI node
|
||||||
|
BLANK = "b" # Blank node
|
||||||
|
LITERAL = "l" # Literal value
|
||||||
|
TRIPLE = "t" # Quoted triple (RDF-star)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Error:
|
class Error:
|
||||||
type: str = ""
|
type: str = ""
|
||||||
message: str = ""
|
message: str = ""
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Value:
|
class Term:
|
||||||
|
"""
|
||||||
|
RDF Term - can represent an IRI, blank node, literal, or quoted triple.
|
||||||
|
|
||||||
|
The 'type' field determines which other fields are relevant:
|
||||||
|
- IRI: use 'iri' field
|
||||||
|
- BLANK: use 'id' field
|
||||||
|
- LITERAL: use 'value', 'datatype', 'language' fields
|
||||||
|
- TRIPLE: use 'triple' field
|
||||||
|
"""
|
||||||
|
type: str = "" # One of: IRI, BLANK, LITERAL, TRIPLE
|
||||||
|
|
||||||
|
# For IRI terms (type == IRI)
|
||||||
|
iri: str = ""
|
||||||
|
|
||||||
|
# For blank nodes (type == BLANK)
|
||||||
|
id: str = ""
|
||||||
|
|
||||||
|
# For literals (type == LITERAL)
|
||||||
value: str = ""
|
value: str = ""
|
||||||
is_uri: bool = False
|
datatype: str = "" # XSD datatype URI (mutually exclusive with language)
|
||||||
type: str = ""
|
language: str = "" # Language tag (mutually exclusive with datatype)
|
||||||
|
|
||||||
|
# For quoted triples (type == TRIPLE)
|
||||||
|
triple: "Triple | None" = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Triple:
|
class Triple:
|
||||||
s: Value | None = None
|
"""
|
||||||
p: Value | None = None
|
RDF Triple / Quad.
|
||||||
o: Value | None = None
|
|
||||||
|
The optional 'g' field specifies the named graph (None = default graph).
|
||||||
|
"""
|
||||||
|
s: Term | None = None # Subject
|
||||||
|
p: Term | None = None # Predicate
|
||||||
|
o: Term | None = None # Object
|
||||||
|
g: str | None = None # Graph name (IRI), None = default graph
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Field:
|
class Field:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from ..core.metadata import Metadata
|
from ..core.metadata import Metadata
|
||||||
from ..core.primitives import Value, RowSchema
|
from ..core.primitives import Term, RowSchema
|
||||||
from ..core.topic import topic
|
from ..core.topic import topic
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
@ -10,7 +10,7 @@ from ..core.topic import topic
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class EntityEmbeddings:
|
class EntityEmbeddings:
|
||||||
entity: Value | None = None
|
entity: Term | None = None
|
||||||
vectors: list[list[float]] = field(default_factory=list)
|
vectors: list[list[float]] = field(default_factory=list)
|
||||||
|
|
||||||
# This is a 'batching' mechanism for the above data
|
# This is a 'batching' mechanism for the above data
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from ..core.primitives import Value, Triple
|
from ..core.primitives import Term, Triple
|
||||||
from ..core.metadata import Metadata
|
from ..core.metadata import Metadata
|
||||||
from ..core.topic import topic
|
from ..core.topic import topic
|
||||||
|
|
||||||
|
|
@ -10,7 +10,7 @@ from ..core.topic import topic
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class EntityContext:
|
class EntityContext:
|
||||||
entity: Value | None = None
|
entity: Term | None = None
|
||||||
context: str = ""
|
context: str = ""
|
||||||
|
|
||||||
# This is a 'batching' mechanism for the above data
|
# This is a 'batching' mechanism for the above data
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from ..core.primitives import Error, Value, Triple
|
from ..core.primitives import Error, Term, Triple
|
||||||
from ..core.topic import topic
|
from ..core.topic import topic
|
||||||
from ..core.metadata import Metadata
|
from ..core.metadata import Metadata
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
from ..core.primitives import Error, Value, Triple
|
from ..core.primitives import Error, Term, Triple
|
||||||
from ..core.topic import topic
|
from ..core.topic import topic
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
@ -17,7 +17,7 @@ class GraphEmbeddingsRequest:
|
||||||
@dataclass
|
@dataclass
|
||||||
class GraphEmbeddingsResponse:
|
class GraphEmbeddingsResponse:
|
||||||
error: Error | None = None
|
error: Error | None = None
|
||||||
entities: list[Value] = field(default_factory=list)
|
entities: list[Term] = field(default_factory=list)
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
|
|
@ -27,9 +27,10 @@ class GraphEmbeddingsResponse:
|
||||||
class TriplesQueryRequest:
|
class TriplesQueryRequest:
|
||||||
user: str = ""
|
user: str = ""
|
||||||
collection: str = ""
|
collection: str = ""
|
||||||
s: Value | None = None
|
s: Term | None = None
|
||||||
p: Value | None = None
|
p: Term | None = None
|
||||||
o: Value | None = None
|
o: Term | None = None
|
||||||
|
g: str | None = None # Graph IRI. None=default graph, "*"=all graphs
|
||||||
limit: int = 0
|
limit: int = 0
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from ..core.topic import topic
|
from ..core.topic import topic
|
||||||
from ..core.primitives import Error, Value
|
from ..core.primitives import Error, Term
|
||||||
|
|
||||||
############################################################################
|
############################################################################
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"pulsar-client",
|
"pulsar-client",
|
||||||
"prometheus-client",
|
"prometheus-client",
|
||||||
"boto3",
|
"boto3",
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"requests",
|
"requests",
|
||||||
"pulsar-client",
|
"pulsar-client",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,8 @@ description = "HuggingFace embeddings support for TrustGraph."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"trustgraph-flow>=1.9,<1.10",
|
"trustgraph-flow>=2.0,<2.1",
|
||||||
"torch",
|
"torch",
|
||||||
"urllib3",
|
"urllib3",
|
||||||
"transformers",
|
"transformers",
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"aiohttp",
|
"aiohttp",
|
||||||
"anthropic",
|
"anthropic",
|
||||||
"scylla-driver",
|
"scylla-driver",
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,24 @@ _active_clusters = []
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Sentinel value for wildcard graph queries
|
||||||
|
GRAPH_WILDCARD = "*"
|
||||||
|
|
||||||
|
# Default graph stored as empty string
|
||||||
|
DEFAULT_GRAPH = ""
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeGraph:
|
class KnowledgeGraph:
|
||||||
|
"""
|
||||||
|
Cassandra-backed knowledge graph supporting quads (s, p, o, g).
|
||||||
|
|
||||||
|
Uses 7 tables to support all 16 query patterns efficiently:
|
||||||
|
- Family A (g-wildcard): SPOG, POSG, OSPG
|
||||||
|
- Family B (g-specified): GSPO, GPOS, GOSP
|
||||||
|
- Collection table: COLL (for iteration/deletion)
|
||||||
|
|
||||||
|
Plus a metadata table for tracking collections.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, hosts=None,
|
self, hosts=None,
|
||||||
|
|
@ -24,12 +41,22 @@ class KnowledgeGraph:
|
||||||
self.keyspace = keyspace
|
self.keyspace = keyspace
|
||||||
self.username = username
|
self.username = username
|
||||||
|
|
||||||
# Optimized multi-table schema with collection deletion support
|
# 7-table schema for quads with full query pattern support
|
||||||
self.subject_table = "triples_s"
|
# Family A: g-wildcard queries (g in clustering columns)
|
||||||
self.po_table = "triples_p"
|
self.spog_table = "quads_spog" # partition (collection, s), cluster (p, o, g)
|
||||||
self.object_table = "triples_o"
|
self.posg_table = "quads_posg" # partition (collection, p), cluster (o, s, g)
|
||||||
self.collection_table = "triples_collection" # For SPO queries and deletion
|
self.ospg_table = "quads_ospg" # partition (collection, o), cluster (s, p, g)
|
||||||
self.collection_metadata_table = "collection_metadata" # For tracking which collections exist
|
|
||||||
|
# Family B: g-specified queries (g in partition key)
|
||||||
|
self.gspo_table = "quads_gspo" # partition (collection, g, s), cluster (p, o)
|
||||||
|
self.gpos_table = "quads_gpos" # partition (collection, g, p), cluster (o, s)
|
||||||
|
self.gosp_table = "quads_gosp" # partition (collection, g, o), cluster (s, p)
|
||||||
|
|
||||||
|
# Collection table for iteration and bulk deletion
|
||||||
|
self.coll_table = "quads_coll" # partition (collection), cluster (g, s, p, o)
|
||||||
|
|
||||||
|
# Collection metadata tracking
|
||||||
|
self.collection_metadata_table = "collection_metadata"
|
||||||
|
|
||||||
if username and password:
|
if username and password:
|
||||||
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
|
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
|
||||||
|
|
@ -46,237 +73,376 @@ class KnowledgeGraph:
|
||||||
self.prepare_statements()
|
self.prepare_statements()
|
||||||
|
|
||||||
def clear(self):
|
def clear(self):
|
||||||
|
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
drop keyspace if exists {self.keyspace};
|
drop keyspace if exists {self.keyspace};
|
||||||
""");
|
""")
|
||||||
|
|
||||||
self.init()
|
self.init()
|
||||||
|
|
||||||
def init(self):
|
def init(self):
|
||||||
|
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
create keyspace if not exists {self.keyspace}
|
create keyspace if not exists {self.keyspace}
|
||||||
with replication = {{
|
with replication = {{
|
||||||
'class' : 'SimpleStrategy',
|
'class' : 'SimpleStrategy',
|
||||||
'replication_factor' : 1
|
'replication_factor' : 1
|
||||||
}};
|
}};
|
||||||
""");
|
""")
|
||||||
|
|
||||||
self.session.set_keyspace(self.keyspace)
|
self.session.set_keyspace(self.keyspace)
|
||||||
self.init_optimized_schema()
|
self.init_quad_schema()
|
||||||
|
|
||||||
|
def init_quad_schema(self):
|
||||||
|
"""Initialize 7-table schema for quads with full query pattern support"""
|
||||||
|
|
||||||
def init_optimized_schema(self):
|
# Family A: g-wildcard queries (g in clustering columns)
|
||||||
"""Initialize optimized multi-table schema for performance"""
|
|
||||||
# Table 1: Subject-centric queries (get_s, get_sp, get_os)
|
# SPOG: partition (collection, s), cluster (p, o, g)
|
||||||
# Compound partition key for optimal data distribution
|
# Supports: (?, s, ?, ?), (?, s, p, ?), (?, s, p, o)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.subject_table} (
|
CREATE TABLE IF NOT EXISTS {self.spog_table} (
|
||||||
collection text,
|
collection text,
|
||||||
s text,
|
s text,
|
||||||
p text,
|
p text,
|
||||||
o text,
|
o text,
|
||||||
PRIMARY KEY ((collection, s), p, o)
|
g text,
|
||||||
|
PRIMARY KEY ((collection, s), p, o, g)
|
||||||
);
|
);
|
||||||
""");
|
""")
|
||||||
|
|
||||||
# Table 2: Predicate-Object queries (get_p, get_po) - eliminates ALLOW FILTERING!
|
# POSG: partition (collection, p), cluster (o, s, g)
|
||||||
# Compound partition key for optimal data distribution
|
# Supports: (?, ?, p, ?), (?, ?, p, o)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.po_table} (
|
CREATE TABLE IF NOT EXISTS {self.posg_table} (
|
||||||
collection text,
|
collection text,
|
||||||
p text,
|
p text,
|
||||||
o text,
|
o text,
|
||||||
s text,
|
s text,
|
||||||
PRIMARY KEY ((collection, p), o, s)
|
g text,
|
||||||
|
PRIMARY KEY ((collection, p), o, s, g)
|
||||||
);
|
);
|
||||||
""");
|
""")
|
||||||
|
|
||||||
# Table 3: Object-centric queries (get_o)
|
# OSPG: partition (collection, o), cluster (s, p, g)
|
||||||
# Compound partition key for optimal data distribution
|
# Supports: (?, ?, ?, o), (?, s, ?, o)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.object_table} (
|
CREATE TABLE IF NOT EXISTS {self.ospg_table} (
|
||||||
collection text,
|
collection text,
|
||||||
o text,
|
o text,
|
||||||
s text,
|
s text,
|
||||||
p text,
|
p text,
|
||||||
PRIMARY KEY ((collection, o), s, p)
|
g text,
|
||||||
|
PRIMARY KEY ((collection, o), s, p, g)
|
||||||
);
|
);
|
||||||
""");
|
""")
|
||||||
|
|
||||||
# Table 4: Collection management and SPO queries (get_spo)
|
# Family B: g-specified queries (g in partition key)
|
||||||
# Simple partition key enables efficient collection deletion
|
|
||||||
|
# GSPO: partition (collection, g, s), cluster (p, o)
|
||||||
|
# Supports: (g, s, ?, ?), (g, s, p, ?), (g, s, p, o)
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.collection_table} (
|
CREATE TABLE IF NOT EXISTS {self.gspo_table} (
|
||||||
collection text,
|
collection text,
|
||||||
|
g text,
|
||||||
s text,
|
s text,
|
||||||
p text,
|
p text,
|
||||||
o text,
|
o text,
|
||||||
PRIMARY KEY (collection, s, p, o)
|
PRIMARY KEY ((collection, g, s), p, o)
|
||||||
);
|
);
|
||||||
""");
|
""")
|
||||||
|
|
||||||
# Table 5: Collection metadata tracking
|
# GPOS: partition (collection, g, p), cluster (o, s)
|
||||||
# Tracks which collections exist without polluting triple data
|
# Supports: (g, ?, p, ?), (g, ?, p, o)
|
||||||
|
self.session.execute(f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.gpos_table} (
|
||||||
|
collection text,
|
||||||
|
g text,
|
||||||
|
p text,
|
||||||
|
o text,
|
||||||
|
s text,
|
||||||
|
PRIMARY KEY ((collection, g, p), o, s)
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
|
||||||
|
# GOSP: partition (collection, g, o), cluster (s, p)
|
||||||
|
# Supports: (g, ?, ?, o), (g, s, ?, o)
|
||||||
|
self.session.execute(f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.gosp_table} (
|
||||||
|
collection text,
|
||||||
|
g text,
|
||||||
|
o text,
|
||||||
|
s text,
|
||||||
|
p text,
|
||||||
|
PRIMARY KEY ((collection, g, o), s, p)
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Collection table for iteration and bulk deletion
|
||||||
|
# COLL: partition (collection), cluster (g, s, p, o)
|
||||||
|
self.session.execute(f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {self.coll_table} (
|
||||||
|
collection text,
|
||||||
|
g text,
|
||||||
|
s text,
|
||||||
|
p text,
|
||||||
|
o text,
|
||||||
|
PRIMARY KEY (collection, g, s, p, o)
|
||||||
|
);
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Collection metadata tracking
|
||||||
self.session.execute(f"""
|
self.session.execute(f"""
|
||||||
CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} (
|
CREATE TABLE IF NOT EXISTS {self.collection_metadata_table} (
|
||||||
collection text,
|
collection text,
|
||||||
created_at timestamp,
|
created_at timestamp,
|
||||||
PRIMARY KEY (collection)
|
PRIMARY KEY (collection)
|
||||||
);
|
);
|
||||||
""");
|
""")
|
||||||
|
|
||||||
logger.info("Optimized multi-table schema initialized (5 tables)")
|
logger.info("Quad schema initialized (7 tables + metadata)")
|
||||||
|
|
||||||
def prepare_statements(self):
|
def prepare_statements(self):
|
||||||
"""Prepare statements for optimal performance"""
|
"""Prepare statements for all 7 tables"""
|
||||||
# Insert statements for batch operations
|
|
||||||
self.insert_subject_stmt = self.session.prepare(
|
# Insert statements
|
||||||
f"INSERT INTO {self.subject_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
|
self.insert_spog_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.spog_table} (collection, s, p, o, g) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_posg_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.posg_table} (collection, p, o, s, g) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_ospg_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.ospg_table} (collection, o, s, p, g) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_gspo_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.gspo_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_gpos_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.gpos_table} (collection, g, p, o, s) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_gosp_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.gosp_table} (collection, g, o, s, p) VALUES (?, ?, ?, ?, ?)"
|
||||||
|
)
|
||||||
|
self.insert_coll_stmt = self.session.prepare(
|
||||||
|
f"INSERT INTO {self.coll_table} (collection, g, s, p, o) VALUES (?, ?, ?, ?, ?)"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.insert_po_stmt = self.session.prepare(
|
# Delete statements (for single quad deletion)
|
||||||
f"INSERT INTO {self.po_table} (collection, p, o, s) VALUES (?, ?, ?, ?)"
|
self.delete_spog_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? AND g = ?"
|
||||||
|
)
|
||||||
|
self.delete_posg_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? AND s = ? AND g = ?"
|
||||||
|
)
|
||||||
|
self.delete_ospg_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? AND p = ? AND g = ?"
|
||||||
|
)
|
||||||
|
self.delete_gspo_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
|
||||||
|
)
|
||||||
|
self.delete_gpos_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? AND s = ?"
|
||||||
|
)
|
||||||
|
self.delete_gosp_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? AND p = ?"
|
||||||
|
)
|
||||||
|
self.delete_coll_stmt = self.session.prepare(
|
||||||
|
f"DELETE FROM {self.coll_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.insert_object_stmt = self.session.prepare(
|
# Query statements - Family A (g-wildcard, g in clustering)
|
||||||
f"INSERT INTO {self.object_table} (collection, o, s, p) VALUES (?, ?, ?, ?)"
|
|
||||||
|
# SPOG table queries
|
||||||
|
self.get_s_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT p, o, g FROM {self.spog_table} WHERE collection = ? AND s = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_sp_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT o, g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_spo_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT g FROM {self.spog_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.insert_collection_stmt = self.session.prepare(
|
# POSG table queries
|
||||||
f"INSERT INTO {self.collection_table} (collection, s, p, o) VALUES (?, ?, ?, ?)"
|
self.get_p_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT o, s, g FROM {self.posg_table} WHERE collection = ? AND p = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_po_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT s, g FROM {self.posg_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Query statements for optimized access
|
# OSPG table queries
|
||||||
|
self.get_o_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT s, p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_os_wildcard_stmt = self.session.prepare(
|
||||||
|
f"SELECT p, g FROM {self.ospg_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Query statements - Family B (g-specified, g in partition)
|
||||||
|
|
||||||
|
# GSPO table queries
|
||||||
|
self.get_gs_stmt = self.session.prepare(
|
||||||
|
f"SELECT p, o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_gsp_stmt = self.session.prepare(
|
||||||
|
f"SELECT o FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_gspo_stmt = self.session.prepare(
|
||||||
|
f"SELECT s FROM {self.gspo_table} WHERE collection = ? AND g = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# GPOS table queries
|
||||||
|
self.get_gp_stmt = self.session.prepare(
|
||||||
|
f"SELECT o, s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_gpo_stmt = self.session.prepare(
|
||||||
|
f"SELECT s FROM {self.gpos_table} WHERE collection = ? AND g = ? AND p = ? AND o = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# GOSP table queries
|
||||||
|
self.get_go_stmt = self.session.prepare(
|
||||||
|
f"SELECT s, p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_gos_stmt = self.session.prepare(
|
||||||
|
f"SELECT p FROM {self.gosp_table} WHERE collection = ? AND g = ? AND o = ? AND s = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Collection table query (for get_all and iteration)
|
||||||
self.get_all_stmt = self.session.prepare(
|
self.get_all_stmt = self.session.prepare(
|
||||||
f"SELECT s, p, o FROM {self.subject_table} WHERE collection = ? LIMIT ? ALLOW FILTERING"
|
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = ? LIMIT ?"
|
||||||
|
)
|
||||||
|
self.get_g_stmt = self.session.prepare(
|
||||||
|
f"SELECT s, p, o FROM {self.coll_table} WHERE collection = ? AND g = ? LIMIT ?"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.get_s_stmt = self.session.prepare(
|
logger.info("Prepared statements initialized for quad schema (7 tables)")
|
||||||
f"SELECT p, o FROM {self.subject_table} WHERE collection = ? AND s = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.get_p_stmt = self.session.prepare(
|
def insert(self, collection, s, p, o, g=None):
|
||||||
f"SELECT s, o FROM {self.po_table} WHERE collection = ? AND p = ? LIMIT ?"
|
"""Insert a quad into all 7 tables"""
|
||||||
)
|
# Default graph stored as empty string
|
||||||
|
if g is None:
|
||||||
|
g = DEFAULT_GRAPH
|
||||||
|
|
||||||
self.get_o_stmt = self.session.prepare(
|
|
||||||
f"SELECT s, p FROM {self.object_table} WHERE collection = ? AND o = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.get_sp_stmt = self.session.prepare(
|
|
||||||
f"SELECT o FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
# The critical optimization: get_po without ALLOW FILTERING!
|
|
||||||
self.get_po_stmt = self.session.prepare(
|
|
||||||
f"SELECT s FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.get_os_stmt = self.session.prepare(
|
|
||||||
f"SELECT p FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.get_spo_stmt = self.session.prepare(
|
|
||||||
f"SELECT s as x FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ? LIMIT ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Delete statements for collection deletion
|
|
||||||
self.delete_subject_stmt = self.session.prepare(
|
|
||||||
f"DELETE FROM {self.subject_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.delete_po_stmt = self.session.prepare(
|
|
||||||
f"DELETE FROM {self.po_table} WHERE collection = ? AND p = ? AND o = ? AND s = ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.delete_object_stmt = self.session.prepare(
|
|
||||||
f"DELETE FROM {self.object_table} WHERE collection = ? AND o = ? AND s = ? AND p = ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.delete_collection_stmt = self.session.prepare(
|
|
||||||
f"DELETE FROM {self.collection_table} WHERE collection = ? AND s = ? AND p = ? AND o = ?"
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Prepared statements initialized for optimal performance (4 tables)")
|
|
||||||
|
|
||||||
def insert(self, collection, s, p, o):
|
|
||||||
# Batch write to all four tables for consistency
|
|
||||||
batch = BatchStatement()
|
batch = BatchStatement()
|
||||||
|
|
||||||
# Insert into subject table
|
# Family A tables
|
||||||
batch.add(self.insert_subject_stmt, (collection, s, p, o))
|
batch.add(self.insert_spog_stmt, (collection, s, p, o, g))
|
||||||
|
batch.add(self.insert_posg_stmt, (collection, p, o, s, g))
|
||||||
|
batch.add(self.insert_ospg_stmt, (collection, o, s, p, g))
|
||||||
|
|
||||||
# Insert into predicate-object table (column order: collection, p, o, s)
|
# Family B tables
|
||||||
batch.add(self.insert_po_stmt, (collection, p, o, s))
|
batch.add(self.insert_gspo_stmt, (collection, g, s, p, o))
|
||||||
|
batch.add(self.insert_gpos_stmt, (collection, g, p, o, s))
|
||||||
|
batch.add(self.insert_gosp_stmt, (collection, g, o, s, p))
|
||||||
|
|
||||||
# Insert into object table (column order: collection, o, s, p)
|
# Collection table
|
||||||
batch.add(self.insert_object_stmt, (collection, o, s, p))
|
batch.add(self.insert_coll_stmt, (collection, g, s, p, o))
|
||||||
|
|
||||||
# Insert into collection table for SPO queries and deletion tracking
|
|
||||||
batch.add(self.insert_collection_stmt, (collection, s, p, o))
|
|
||||||
|
|
||||||
self.session.execute(batch)
|
self.session.execute(batch)
|
||||||
|
|
||||||
|
def delete_quad(self, collection, s, p, o, g=None):
|
||||||
|
"""Delete a single quad from all 7 tables"""
|
||||||
|
if g is None:
|
||||||
|
g = DEFAULT_GRAPH
|
||||||
|
|
||||||
|
batch = BatchStatement()
|
||||||
|
|
||||||
|
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
|
||||||
|
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
|
||||||
|
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
|
||||||
|
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
|
||||||
|
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
|
||||||
|
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
|
||||||
|
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
|
||||||
|
|
||||||
|
self.session.execute(batch)
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# Query methods
|
||||||
|
# g=None means default graph, g="*" means all graphs
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
def get_all(self, collection, limit=50):
|
def get_all(self, collection, limit=50):
|
||||||
# Use subject table for get_all queries
|
"""Get all quads in collection"""
|
||||||
return self.session.execute(
|
return self.session.execute(self.get_all_stmt, (collection, limit))
|
||||||
self.get_all_stmt,
|
|
||||||
(collection, limit)
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_s(self, collection, s, limit=10):
|
def get_s(self, collection, s, g=None, limit=10):
|
||||||
# Optimized: Direct partition access with (collection, s)
|
"""Query by subject. g=None: default graph, g='*': all graphs"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_s_stmt,
|
# Default graph - use GSPO table
|
||||||
(collection, s, limit)
|
return self.session.execute(self.get_gs_stmt, (collection, DEFAULT_GRAPH, s, limit))
|
||||||
)
|
elif g == GRAPH_WILDCARD:
|
||||||
|
# All graphs - use SPOG table
|
||||||
|
return self.session.execute(self.get_s_wildcard_stmt, (collection, s, limit))
|
||||||
|
else:
|
||||||
|
# Specific graph - use GSPO table
|
||||||
|
return self.session.execute(self.get_gs_stmt, (collection, g, s, limit))
|
||||||
|
|
||||||
def get_p(self, collection, p, limit=10):
|
def get_p(self, collection, p, g=None, limit=10):
|
||||||
# Optimized: Use po_table for direct partition access
|
"""Query by predicate"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_p_stmt,
|
return self.session.execute(self.get_gp_stmt, (collection, DEFAULT_GRAPH, p, limit))
|
||||||
(collection, p, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_p_wildcard_stmt, (collection, p, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_gp_stmt, (collection, g, p, limit))
|
||||||
|
|
||||||
def get_o(self, collection, o, limit=10):
|
def get_o(self, collection, o, g=None, limit=10):
|
||||||
# Optimized: Use object_table for direct partition access
|
"""Query by object"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_o_stmt,
|
return self.session.execute(self.get_go_stmt, (collection, DEFAULT_GRAPH, o, limit))
|
||||||
(collection, o, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_o_wildcard_stmt, (collection, o, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_go_stmt, (collection, g, o, limit))
|
||||||
|
|
||||||
def get_sp(self, collection, s, p, limit=10):
|
def get_sp(self, collection, s, p, g=None, limit=10):
|
||||||
# Optimized: Use subject_table with clustering key access
|
"""Query by subject and predicate"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_sp_stmt,
|
return self.session.execute(self.get_gsp_stmt, (collection, DEFAULT_GRAPH, s, p, limit))
|
||||||
(collection, s, p, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_sp_wildcard_stmt, (collection, s, p, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_gsp_stmt, (collection, g, s, p, limit))
|
||||||
|
|
||||||
def get_po(self, collection, p, o, limit=10):
|
def get_po(self, collection, p, o, g=None, limit=10):
|
||||||
# CRITICAL OPTIMIZATION: Use po_table - NO MORE ALLOW FILTERING!
|
"""Query by predicate and object"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_po_stmt,
|
return self.session.execute(self.get_gpo_stmt, (collection, DEFAULT_GRAPH, p, o, limit))
|
||||||
(collection, p, o, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_po_wildcard_stmt, (collection, p, o, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_gpo_stmt, (collection, g, p, o, limit))
|
||||||
|
|
||||||
def get_os(self, collection, o, s, limit=10):
|
def get_os(self, collection, o, s, g=None, limit=10):
|
||||||
# Optimized: Use subject_table with clustering access (no more ALLOW FILTERING)
|
"""Query by object and subject"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_os_stmt,
|
return self.session.execute(self.get_gos_stmt, (collection, DEFAULT_GRAPH, o, s, limit))
|
||||||
(collection, s, o, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_os_wildcard_stmt, (collection, o, s, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_gos_stmt, (collection, g, o, s, limit))
|
||||||
|
|
||||||
def get_spo(self, collection, s, p, o, limit=10):
|
def get_spo(self, collection, s, p, o, g=None, limit=10):
|
||||||
# Optimized: Use collection_table for exact key lookup
|
"""Query by subject, predicate, object (find which graphs)"""
|
||||||
return self.session.execute(
|
if g is None or g == DEFAULT_GRAPH:
|
||||||
self.get_spo_stmt,
|
return self.session.execute(self.get_gspo_stmt, (collection, DEFAULT_GRAPH, s, p, o, limit))
|
||||||
(collection, s, p, o, limit)
|
elif g == GRAPH_WILDCARD:
|
||||||
)
|
return self.session.execute(self.get_spo_wildcard_stmt, (collection, s, p, o, limit))
|
||||||
|
else:
|
||||||
|
return self.session.execute(self.get_gspo_stmt, (collection, g, s, p, o, limit))
|
||||||
|
|
||||||
|
def get_g(self, collection, g, limit=50):
|
||||||
|
"""Get all quads in a specific graph"""
|
||||||
|
if g is None:
|
||||||
|
g = DEFAULT_GRAPH
|
||||||
|
return self.session.execute(self.get_g_stmt, (collection, g, limit))
|
||||||
|
|
||||||
|
# ========================================================================
|
||||||
|
# Collection management
|
||||||
|
# ========================================================================
|
||||||
|
|
||||||
def collection_exists(self, collection):
|
def collection_exists(self, collection):
|
||||||
"""Check if collection exists by querying collection_metadata table"""
|
"""Check if collection exists"""
|
||||||
try:
|
try:
|
||||||
result = self.session.execute(
|
result = self.session.execute(
|
||||||
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
|
f"SELECT collection FROM {self.collection_metadata_table} WHERE collection = %s LIMIT 1",
|
||||||
|
|
@ -301,63 +467,52 @@ class KnowledgeGraph:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
def delete_collection(self, collection):
|
def delete_collection(self, collection):
|
||||||
"""Delete all triples for a specific collection
|
"""Delete all quads for a collection from all 7 tables"""
|
||||||
|
# Read all quads from collection table
|
||||||
Uses collection_table to enumerate all triples, then deletes from all 4 tables
|
|
||||||
using full partition keys for optimal performance with compound keys.
|
|
||||||
"""
|
|
||||||
# Step 1: Read all triples from collection_table (single partition read)
|
|
||||||
rows = self.session.execute(
|
rows = self.session.execute(
|
||||||
f"SELECT s, p, o FROM {self.collection_table} WHERE collection = %s",
|
f"SELECT g, s, p, o FROM {self.coll_table} WHERE collection = %s",
|
||||||
(collection,)
|
(collection,)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Step 2: Delete each triple from all 4 tables using full partition keys
|
|
||||||
# Batch deletions for efficiency
|
|
||||||
batch = BatchStatement()
|
batch = BatchStatement()
|
||||||
count = 0
|
count = 0
|
||||||
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
s, p, o = row.s, row.p, row.o
|
g, s, p, o = row.g, row.s, row.p, row.o
|
||||||
|
|
||||||
# Delete from subject table (partition key: collection, s)
|
# Delete from all 7 tables
|
||||||
batch.add(self.delete_subject_stmt, (collection, s, p, o))
|
batch.add(self.delete_spog_stmt, (collection, s, p, o, g))
|
||||||
|
batch.add(self.delete_posg_stmt, (collection, p, o, s, g))
|
||||||
# Delete from predicate-object table (partition key: collection, p)
|
batch.add(self.delete_ospg_stmt, (collection, o, s, p, g))
|
||||||
batch.add(self.delete_po_stmt, (collection, p, o, s))
|
batch.add(self.delete_gspo_stmt, (collection, g, s, p, o))
|
||||||
|
batch.add(self.delete_gpos_stmt, (collection, g, p, o, s))
|
||||||
# Delete from object table (partition key: collection, o)
|
batch.add(self.delete_gosp_stmt, (collection, g, o, s, p))
|
||||||
batch.add(self.delete_object_stmt, (collection, o, s, p))
|
batch.add(self.delete_coll_stmt, (collection, g, s, p, o))
|
||||||
|
|
||||||
# Delete from collection table (partition key: collection only)
|
|
||||||
batch.add(self.delete_collection_stmt, (collection, s, p, o))
|
|
||||||
|
|
||||||
count += 1
|
count += 1
|
||||||
|
|
||||||
# Execute batch every 25 triples to avoid oversized batches
|
# Execute batch every 15 quads (7 deletes each = 105 statements)
|
||||||
# (Each triple adds ~4 statements, so 25 triples = ~100 statements)
|
if count % 15 == 0:
|
||||||
if count % 25 == 0:
|
|
||||||
self.session.execute(batch)
|
self.session.execute(batch)
|
||||||
batch = BatchStatement()
|
batch = BatchStatement()
|
||||||
|
|
||||||
# Execute remaining deletions
|
# Execute remaining
|
||||||
if count % 25 != 0:
|
if count % 15 != 0:
|
||||||
self.session.execute(batch)
|
self.session.execute(batch)
|
||||||
|
|
||||||
# Step 3: Delete collection metadata
|
# Delete collection metadata
|
||||||
self.session.execute(
|
self.session.execute(
|
||||||
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
|
f"DELETE FROM {self.collection_metadata_table} WHERE collection = %s",
|
||||||
(collection,)
|
(collection,)
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"Deleted {count} triples from collection {collection}")
|
logger.info(f"Deleted {count} quads from collection {collection}")
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""Close the Cassandra session and cluster connections properly"""
|
"""Close connections"""
|
||||||
if hasattr(self, 'session') and self.session:
|
if hasattr(self, 'session') and self.session:
|
||||||
self.session.shutdown()
|
self.session.shutdown()
|
||||||
if hasattr(self, 'cluster') and self.cluster:
|
if hasattr(self, 'cluster') and self.cluster:
|
||||||
self.cluster.shutdown()
|
self.cluster.shutdown()
|
||||||
# Remove from global tracking
|
|
||||||
if self.cluster in _active_clusters:
|
if self.cluster in _active_clusters:
|
||||||
_active_clusters.remove(self.cluster)
|
_active_clusters.remove(self.cluster)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from ....schema import Chunk, Triple, Triples, Metadata, Value
|
from ....schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||||
from ....schema import EntityContext, EntityContexts
|
from ....schema import EntityContext, EntityContexts
|
||||||
|
|
||||||
from ....rdf import TRUSTGRAPH_ENTITIES, RDF_LABEL, SUBJECT_OF, DEFINITION
|
from ....rdf import TRUSTGRAPH_ENTITIES, RDF_LABEL, SUBJECT_OF, DEFINITION
|
||||||
|
|
@ -256,29 +256,29 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# Add entity label
|
# Add entity label
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = Value(value=entity_uri, is_uri=True),
|
s = Term(type=IRI, iri=entity_uri),
|
||||||
p = Value(value=RDF_LABEL, is_uri=True),
|
p = Term(type=IRI, iri=RDF_LABEL),
|
||||||
o = Value(value=defn["entity"], is_uri=False),
|
o = Term(type=LITERAL, value=defn["entity"]),
|
||||||
))
|
))
|
||||||
|
|
||||||
# Add definition
|
# Add definition
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = Value(value=entity_uri, is_uri=True),
|
s = Term(type=IRI, iri=entity_uri),
|
||||||
p = Value(value=DEFINITION, is_uri=True),
|
p = Term(type=IRI, iri=DEFINITION),
|
||||||
o = Value(value=defn["definition"], is_uri=False),
|
o = Term(type=LITERAL, value=defn["definition"]),
|
||||||
))
|
))
|
||||||
|
|
||||||
# Add subject-of relationship to document
|
# Add subject-of relationship to document
|
||||||
if metadata.id:
|
if metadata.id:
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = Value(value=entity_uri, is_uri=True),
|
s = Term(type=IRI, iri=entity_uri),
|
||||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||||
o = Value(value=metadata.id, is_uri=True),
|
o = Term(type=IRI, iri=metadata.id),
|
||||||
))
|
))
|
||||||
|
|
||||||
# Create entity context for embeddings
|
# Create entity context for embeddings
|
||||||
entity_contexts.append(EntityContext(
|
entity_contexts.append(EntityContext(
|
||||||
entity=Value(value=entity_uri, is_uri=True),
|
entity=Term(type=IRI, iri=entity_uri),
|
||||||
context=defn["definition"]
|
context=defn["definition"]
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
@ -288,33 +288,33 @@ class Processor(FlowProcessor):
|
||||||
subject_uri = self.to_uri(rel["subject"])
|
subject_uri = self.to_uri(rel["subject"])
|
||||||
predicate_uri = self.to_uri(rel["predicate"])
|
predicate_uri = self.to_uri(rel["predicate"])
|
||||||
|
|
||||||
subject_value = Value(value=subject_uri, is_uri=True)
|
subject_value = Term(type=IRI, iri=subject_uri)
|
||||||
predicate_value = Value(value=predicate_uri, is_uri=True)
|
predicate_value = Term(type=IRI, iri=predicate_uri)
|
||||||
if rel.get("object-entity", True):
|
if rel.get("object-entity", True):
|
||||||
object_uri = self.to_uri(rel["object"])
|
object_uri = self.to_uri(rel["object"])
|
||||||
object_value = Value(value=object_uri, is_uri=True)
|
object_value = Term(type=IRI, iri=object_uri)
|
||||||
else:
|
else:
|
||||||
object_value = Value(value=rel["object"], is_uri=False)
|
object_value = Term(type=LITERAL, value=rel["object"])
|
||||||
|
|
||||||
# Add subject and predicate labels
|
# Add subject and predicate labels
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = subject_value,
|
s = subject_value,
|
||||||
p = Value(value=RDF_LABEL, is_uri=True),
|
p = Term(type=IRI, iri=RDF_LABEL),
|
||||||
o = Value(value=rel["subject"], is_uri=False),
|
o = Term(type=LITERAL, value=rel["subject"]),
|
||||||
))
|
))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = predicate_value,
|
s = predicate_value,
|
||||||
p = Value(value=RDF_LABEL, is_uri=True),
|
p = Term(type=IRI, iri=RDF_LABEL),
|
||||||
o = Value(value=rel["predicate"], is_uri=False),
|
o = Term(type=LITERAL, value=rel["predicate"]),
|
||||||
))
|
))
|
||||||
|
|
||||||
# Handle object (entity vs literal)
|
# Handle object (entity vs literal)
|
||||||
if rel.get("object-entity", True):
|
if rel.get("object-entity", True):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = object_value,
|
s = object_value,
|
||||||
p = Value(value=RDF_LABEL, is_uri=True),
|
p = Term(type=IRI, iri=RDF_LABEL),
|
||||||
o = Value(value=rel["object"], is_uri=True),
|
o = Term(type=LITERAL, value=rel["object"]),
|
||||||
))
|
))
|
||||||
|
|
||||||
# Add the main relationship triple
|
# Add the main relationship triple
|
||||||
|
|
@ -328,21 +328,21 @@ class Processor(FlowProcessor):
|
||||||
if metadata.id:
|
if metadata.id:
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = subject_value,
|
s = subject_value,
|
||||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||||
o = Value(value=metadata.id, is_uri=True),
|
o = Term(type=IRI, iri=metadata.id),
|
||||||
))
|
))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = predicate_value,
|
s = predicate_value,
|
||||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||||
o = Value(value=metadata.id, is_uri=True),
|
o = Term(type=IRI, iri=metadata.id),
|
||||||
))
|
))
|
||||||
|
|
||||||
if rel.get("object-entity", True):
|
if rel.get("object-entity", True):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s = object_value,
|
s = object_value,
|
||||||
p = Value(value=SUBJECT_OF, is_uri=True),
|
p = Term(type=IRI, iri=SUBJECT_OF),
|
||||||
o = Value(value=metadata.id, is_uri=True),
|
o = Term(type=IRI, iri=metadata.id),
|
||||||
))
|
))
|
||||||
|
|
||||||
return triples, entity_contexts
|
return triples, entity_contexts
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import json
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -20,9 +20,9 @@ from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||||
from .... base import PromptClientSpec
|
from .... base import PromptClientSpec
|
||||||
|
|
||||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
|
||||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
|
||||||
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
|
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
|
||||||
|
|
||||||
default_ident = "kg-extract-definitions"
|
default_ident = "kg-extract-definitions"
|
||||||
default_concurrency = 1
|
default_concurrency = 1
|
||||||
|
|
@ -142,13 +142,13 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
s_uri = self.to_uri(s)
|
s_uri = self.to_uri(s)
|
||||||
|
|
||||||
s_value = Value(value=str(s_uri), is_uri=True)
|
s_value = Term(type=IRI, iri=str(s_uri))
|
||||||
o_value = Value(value=str(o), is_uri=False)
|
o_value = Term(type=LITERAL, value=str(o))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
p=RDF_LABEL_VALUE,
|
p=RDF_LABEL_VALUE,
|
||||||
o=Value(value=s, is_uri=False),
|
o=Term(type=LITERAL, value=s),
|
||||||
))
|
))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
|
|
@ -158,7 +158,7 @@ class Processor(FlowProcessor):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
p=SUBJECT_OF_VALUE,
|
p=SUBJECT_OF_VALUE,
|
||||||
o=Value(value=v.metadata.id, is_uri=True)
|
o=Term(type=IRI, iri=v.metadata.id)
|
||||||
))
|
))
|
||||||
|
|
||||||
ec = EntityContext(
|
ec = EntityContext(
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||||
from .... schema import EntityContext, EntityContexts
|
from .... schema import EntityContext, EntityContexts
|
||||||
from .... schema import PromptRequest, PromptResponse
|
from .... schema import PromptRequest, PromptResponse
|
||||||
from .... rdf import TRUSTGRAPH_ENTITIES, RDF_TYPE, RDF_LABEL, DEFINITION
|
from .... rdf import TRUSTGRAPH_ENTITIES, RDF_TYPE, RDF_LABEL, DEFINITION
|
||||||
|
|
@ -39,6 +39,14 @@ URI_PREFIXES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def make_term(v, is_uri):
|
||||||
|
"""Helper to create Term from value and is_uri flag."""
|
||||||
|
if is_uri:
|
||||||
|
return Term(type=IRI, iri=v)
|
||||||
|
else:
|
||||||
|
return Term(type=LITERAL, value=v)
|
||||||
|
|
||||||
|
|
||||||
class Processor(FlowProcessor):
|
class Processor(FlowProcessor):
|
||||||
"""Main OntoRAG extraction processor."""
|
"""Main OntoRAG extraction processor."""
|
||||||
|
|
||||||
|
|
@ -446,9 +454,9 @@ class Processor(FlowProcessor):
|
||||||
is_object_uri = False
|
is_object_uri = False
|
||||||
|
|
||||||
# Create Triple object with expanded URIs
|
# Create Triple object with expanded URIs
|
||||||
s_value = Value(value=subject_uri, is_uri=True)
|
s_value = make_term(subject_uri, is_uri=True)
|
||||||
p_value = Value(value=predicate_uri, is_uri=True)
|
p_value = make_term(predicate_uri, is_uri=True)
|
||||||
o_value = Value(value=object_uri, is_uri=is_object_uri)
|
o_value = make_term(object_uri, is_uri=is_object_uri)
|
||||||
|
|
||||||
validated_triples.append(Triple(
|
validated_triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
|
|
@ -609,9 +617,9 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# rdf:type owl:Class
|
# rdf:type owl:Class
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=class_uri, is_uri=True),
|
s=make_term(class_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||||
o=Value(value="http://www.w3.org/2002/07/owl#Class", is_uri=True)
|
o=make_term("http://www.w3.org/2002/07/owl#Class", is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:label (stored as 'labels' in OntologyClass.__dict__)
|
# rdfs:label (stored as 'labels' in OntologyClass.__dict__)
|
||||||
|
|
@ -620,18 +628,18 @@ class Processor(FlowProcessor):
|
||||||
if isinstance(labels, list) and labels:
|
if isinstance(labels, list) and labels:
|
||||||
label_val = labels[0].get('value', class_id) if isinstance(labels[0], dict) else str(labels[0])
|
label_val = labels[0].get('value', class_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=class_uri, is_uri=True),
|
s=make_term(class_uri, is_uri=True),
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=make_term(RDF_LABEL, is_uri=True),
|
||||||
o=Value(value=label_val, is_uri=False)
|
o=make_term(label_val, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:comment (stored as 'comment' in OntologyClass.__dict__)
|
# rdfs:comment (stored as 'comment' in OntologyClass.__dict__)
|
||||||
if isinstance(class_def, dict) and 'comment' in class_def and class_def['comment']:
|
if isinstance(class_def, dict) and 'comment' in class_def and class_def['comment']:
|
||||||
comment = class_def['comment']
|
comment = class_def['comment']
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=class_uri, is_uri=True),
|
s=make_term(class_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||||
o=Value(value=comment, is_uri=False)
|
o=make_term(comment, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:subClassOf (stored as 'subclass_of' in OntologyClass.__dict__)
|
# rdfs:subClassOf (stored as 'subclass_of' in OntologyClass.__dict__)
|
||||||
|
|
@ -648,9 +656,9 @@ class Processor(FlowProcessor):
|
||||||
parent_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{parent}"
|
parent_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{parent}"
|
||||||
|
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=class_uri, is_uri=True),
|
s=make_term(class_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#subClassOf", is_uri=True),
|
||||||
o=Value(value=parent_uri, is_uri=True)
|
o=make_term(parent_uri, is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# Generate triples for object properties
|
# Generate triples for object properties
|
||||||
|
|
@ -663,9 +671,9 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# rdf:type owl:ObjectProperty
|
# rdf:type owl:ObjectProperty
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||||
o=Value(value="http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
|
o=make_term("http://www.w3.org/2002/07/owl#ObjectProperty", is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
||||||
|
|
@ -674,18 +682,18 @@ class Processor(FlowProcessor):
|
||||||
if isinstance(labels, list) and labels:
|
if isinstance(labels, list) and labels:
|
||||||
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=make_term(RDF_LABEL, is_uri=True),
|
||||||
o=Value(value=label_val, is_uri=False)
|
o=make_term(label_val, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
||||||
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
||||||
comment = prop_def['comment']
|
comment = prop_def['comment']
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||||
o=Value(value=comment, is_uri=False)
|
o=make_term(comment, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
||||||
|
|
@ -702,9 +710,9 @@ class Processor(FlowProcessor):
|
||||||
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
||||||
|
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||||
o=Value(value=domain_uri, is_uri=True)
|
o=make_term(domain_uri, is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:range (stored as 'range' in OntologyProperty.__dict__)
|
# rdfs:range (stored as 'range' in OntologyProperty.__dict__)
|
||||||
|
|
@ -721,9 +729,9 @@ class Processor(FlowProcessor):
|
||||||
range_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{range_val}"
|
range_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{range_val}"
|
||||||
|
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||||
o=Value(value=range_uri, is_uri=True)
|
o=make_term(range_uri, is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# Generate triples for datatype properties
|
# Generate triples for datatype properties
|
||||||
|
|
@ -736,9 +744,9 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# rdf:type owl:DatatypeProperty
|
# rdf:type owl:DatatypeProperty
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
p=make_term("http://www.w3.org/1999/02/22-rdf-syntax-ns#type", is_uri=True),
|
||||||
o=Value(value="http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
|
o=make_term("http://www.w3.org/2002/07/owl#DatatypeProperty", is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
# rdfs:label (stored as 'labels' in OntologyProperty.__dict__)
|
||||||
|
|
@ -747,18 +755,18 @@ class Processor(FlowProcessor):
|
||||||
if isinstance(labels, list) and labels:
|
if isinstance(labels, list) and labels:
|
||||||
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
label_val = labels[0].get('value', prop_id) if isinstance(labels[0], dict) else str(labels[0])
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=make_term(RDF_LABEL, is_uri=True),
|
||||||
o=Value(value=label_val, is_uri=False)
|
o=make_term(label_val, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
# rdfs:comment (stored as 'comment' in OntologyProperty.__dict__)
|
||||||
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
if isinstance(prop_def, dict) and 'comment' in prop_def and prop_def['comment']:
|
||||||
comment = prop_def['comment']
|
comment = prop_def['comment']
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#comment", is_uri=True),
|
||||||
o=Value(value=comment, is_uri=False)
|
o=make_term(comment, is_uri=False)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
# rdfs:domain (stored as 'domain' in OntologyProperty.__dict__)
|
||||||
|
|
@ -775,9 +783,9 @@ class Processor(FlowProcessor):
|
||||||
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
domain_uri = f"https://trustgraph.ai/ontology/{ontology_subset.ontology_id}#{domain}"
|
||||||
|
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#domain", is_uri=True),
|
||||||
o=Value(value=domain_uri, is_uri=True)
|
o=make_term(domain_uri, is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
# rdfs:range (datatype)
|
# rdfs:range (datatype)
|
||||||
|
|
@ -790,9 +798,9 @@ class Processor(FlowProcessor):
|
||||||
range_uri = range_val
|
range_uri = range_val
|
||||||
|
|
||||||
ontology_triples.append(Triple(
|
ontology_triples.append(Triple(
|
||||||
s=Value(value=prop_uri, is_uri=True),
|
s=make_term(prop_uri, is_uri=True),
|
||||||
p=Value(value="http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
p=make_term("http://www.w3.org/2000/01/rdf-schema#range", is_uri=True),
|
||||||
o=Value(value=range_uri, is_uri=True)
|
o=make_term(range_uri, is_uri=True)
|
||||||
))
|
))
|
||||||
|
|
||||||
logger.info(f"Generated {len(ontology_triples)} triples describing ontology elements")
|
logger.info(f"Generated {len(ontology_triples)} triples describing ontology elements")
|
||||||
|
|
@ -814,9 +822,9 @@ class Processor(FlowProcessor):
|
||||||
entity_data = {} # subject_uri -> {labels: [], definitions: []}
|
entity_data = {} # subject_uri -> {labels: [], definitions: []}
|
||||||
|
|
||||||
for triple in triples:
|
for triple in triples:
|
||||||
subject_uri = triple.s.value
|
subject_uri = triple.s.iri if triple.s.type == IRI else triple.s.value
|
||||||
predicate_uri = triple.p.value
|
predicate_uri = triple.p.iri if triple.p.type == IRI else triple.p.value
|
||||||
object_val = triple.o.value
|
object_val = triple.o.value if triple.o.type == LITERAL else triple.o.iri
|
||||||
|
|
||||||
# Initialize entity data if not exists
|
# Initialize entity data if not exists
|
||||||
if subject_uri not in entity_data:
|
if subject_uri not in entity_data:
|
||||||
|
|
@ -824,12 +832,12 @@ class Processor(FlowProcessor):
|
||||||
|
|
||||||
# Collect labels (rdfs:label)
|
# Collect labels (rdfs:label)
|
||||||
if predicate_uri == RDF_LABEL:
|
if predicate_uri == RDF_LABEL:
|
||||||
if not triple.o.is_uri: # Labels are literals
|
if triple.o.type == LITERAL: # Labels are literals
|
||||||
entity_data[subject_uri]['labels'].append(object_val)
|
entity_data[subject_uri]['labels'].append(object_val)
|
||||||
|
|
||||||
# Collect definitions (skos:definition, schema:description)
|
# Collect definitions (skos:definition, schema:description)
|
||||||
elif predicate_uri == DEFINITION or predicate_uri == "https://schema.org/description":
|
elif predicate_uri == DEFINITION or predicate_uri == "https://schema.org/description":
|
||||||
if not triple.o.is_uri:
|
if triple.o.type == LITERAL:
|
||||||
entity_data[subject_uri]['definitions'].append(object_val)
|
entity_data[subject_uri]['definitions'].append(object_val)
|
||||||
|
|
||||||
# Build EntityContext objects
|
# Build EntityContext objects
|
||||||
|
|
@ -848,7 +856,7 @@ class Processor(FlowProcessor):
|
||||||
if context_parts:
|
if context_parts:
|
||||||
context_text = ". ".join(context_parts)
|
context_text = ". ".join(context_parts)
|
||||||
entity_contexts.append(EntityContext(
|
entity_contexts.append(EntityContext(
|
||||||
entity=Value(value=subject_uri, is_uri=True),
|
entity=make_term(subject_uri, is_uri=True),
|
||||||
context=context_text
|
context=context_text
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ with full URIs and correct is_uri flags.
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from .... schema import Triple, Value
|
from .... schema import Triple, Term, IRI, LITERAL
|
||||||
from .... rdf import RDF_TYPE, RDF_LABEL
|
from .... rdf import RDF_TYPE, RDF_LABEL
|
||||||
|
|
||||||
from .simplified_parser import Entity, Relationship, Attribute, ExtractionResult
|
from .simplified_parser import Entity, Relationship, Attribute, ExtractionResult
|
||||||
|
|
@ -87,17 +87,17 @@ class TripleConverter:
|
||||||
|
|
||||||
# Generate type triple: entity rdf:type ClassURI
|
# Generate type triple: entity rdf:type ClassURI
|
||||||
type_triple = Triple(
|
type_triple = Triple(
|
||||||
s=Value(value=entity_uri, is_uri=True),
|
s=Term(type=IRI, iri=entity_uri),
|
||||||
p=Value(value=RDF_TYPE, is_uri=True),
|
p=Term(type=IRI, iri=RDF_TYPE),
|
||||||
o=Value(value=class_uri, is_uri=True)
|
o=Term(type=IRI, iri=class_uri)
|
||||||
)
|
)
|
||||||
triples.append(type_triple)
|
triples.append(type_triple)
|
||||||
|
|
||||||
# Generate label triple: entity rdfs:label "entity name"
|
# Generate label triple: entity rdfs:label "entity name"
|
||||||
label_triple = Triple(
|
label_triple = Triple(
|
||||||
s=Value(value=entity_uri, is_uri=True),
|
s=Term(type=IRI, iri=entity_uri),
|
||||||
p=Value(value=RDF_LABEL, is_uri=True),
|
p=Term(type=IRI, iri=RDF_LABEL),
|
||||||
o=Value(value=entity.entity, is_uri=False) # Literal!
|
o=Term(type=LITERAL, value=entity.entity) # Literal!
|
||||||
)
|
)
|
||||||
triples.append(label_triple)
|
triples.append(label_triple)
|
||||||
|
|
||||||
|
|
@ -131,9 +131,9 @@ class TripleConverter:
|
||||||
|
|
||||||
# Generate triple: subject property object
|
# Generate triple: subject property object
|
||||||
return Triple(
|
return Triple(
|
||||||
s=Value(value=subject_uri, is_uri=True),
|
s=Term(type=IRI, iri=subject_uri),
|
||||||
p=Value(value=property_uri, is_uri=True),
|
p=Term(type=IRI, iri=property_uri),
|
||||||
o=Value(value=object_uri, is_uri=True)
|
o=Term(type=IRI, iri=object_uri)
|
||||||
)
|
)
|
||||||
|
|
||||||
def convert_attribute(self, attribute: Attribute) -> Optional[Triple]:
|
def convert_attribute(self, attribute: Attribute) -> Optional[Triple]:
|
||||||
|
|
@ -159,9 +159,9 @@ class TripleConverter:
|
||||||
|
|
||||||
# Generate triple: entity property "literal value"
|
# Generate triple: entity property "literal value"
|
||||||
return Triple(
|
return Triple(
|
||||||
s=Value(value=entity_uri, is_uri=True),
|
s=Term(type=IRI, iri=entity_uri),
|
||||||
p=Value(value=property_uri, is_uri=True),
|
p=Term(type=IRI, iri=property_uri),
|
||||||
o=Value(value=attribute.value, is_uri=False) # Literal!
|
o=Term(type=LITERAL, value=attribute.value) # Literal!
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_class_uri(self, class_id: str) -> Optional[str]:
|
def _get_class_uri(self, class_id: str) -> Optional[str]:
|
||||||
|
|
|
||||||
|
|
@ -13,15 +13,15 @@ import urllib.parse
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from .... schema import Chunk, Triple, Triples
|
from .... schema import Chunk, Triple, Triples
|
||||||
from .... schema import Metadata, Value
|
from .... schema import Metadata, Term, IRI, LITERAL
|
||||||
from .... schema import PromptRequest, PromptResponse
|
from .... schema import PromptRequest, PromptResponse
|
||||||
from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
|
from .... rdf import RDF_LABEL, TRUSTGRAPH_ENTITIES, SUBJECT_OF
|
||||||
|
|
||||||
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
from .... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||||
from .... base import PromptClientSpec
|
from .... base import PromptClientSpec
|
||||||
|
|
||||||
RDF_LABEL_VALUE = Value(value=RDF_LABEL, is_uri=True)
|
RDF_LABEL_VALUE = Term(type=IRI, iri=RDF_LABEL)
|
||||||
SUBJECT_OF_VALUE = Value(value=SUBJECT_OF, is_uri=True)
|
SUBJECT_OF_VALUE = Term(type=IRI, iri=SUBJECT_OF)
|
||||||
|
|
||||||
default_ident = "kg-extract-relationships"
|
default_ident = "kg-extract-relationships"
|
||||||
default_concurrency = 1
|
default_concurrency = 1
|
||||||
|
|
@ -127,16 +127,16 @@ class Processor(FlowProcessor):
|
||||||
if o is None: continue
|
if o is None: continue
|
||||||
|
|
||||||
s_uri = self.to_uri(s)
|
s_uri = self.to_uri(s)
|
||||||
s_value = Value(value=str(s_uri), is_uri=True)
|
s_value = Term(type=IRI, iri=str(s_uri))
|
||||||
|
|
||||||
p_uri = self.to_uri(p)
|
p_uri = self.to_uri(p)
|
||||||
p_value = Value(value=str(p_uri), is_uri=True)
|
p_value = Term(type=IRI, iri=str(p_uri))
|
||||||
|
|
||||||
if rel["object-entity"]:
|
if rel["object-entity"]:
|
||||||
o_uri = self.to_uri(o)
|
o_uri = self.to_uri(o)
|
||||||
o_value = Value(value=str(o_uri), is_uri=True)
|
o_value = Term(type=IRI, iri=str(o_uri))
|
||||||
else:
|
else:
|
||||||
o_value = Value(value=str(o), is_uri=False)
|
o_value = Term(type=LITERAL, value=str(o))
|
||||||
|
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
|
|
@ -148,14 +148,14 @@ class Processor(FlowProcessor):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
p=RDF_LABEL_VALUE,
|
p=RDF_LABEL_VALUE,
|
||||||
o=Value(value=str(s), is_uri=False)
|
o=Term(type=LITERAL, value=str(s))
|
||||||
))
|
))
|
||||||
|
|
||||||
# Label for p
|
# Label for p
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=p_value,
|
s=p_value,
|
||||||
p=RDF_LABEL_VALUE,
|
p=RDF_LABEL_VALUE,
|
||||||
o=Value(value=str(p), is_uri=False)
|
o=Term(type=LITERAL, value=str(p))
|
||||||
))
|
))
|
||||||
|
|
||||||
if rel["object-entity"]:
|
if rel["object-entity"]:
|
||||||
|
|
@ -163,14 +163,14 @@ class Processor(FlowProcessor):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=o_value,
|
s=o_value,
|
||||||
p=RDF_LABEL_VALUE,
|
p=RDF_LABEL_VALUE,
|
||||||
o=Value(value=str(o), is_uri=False)
|
o=Term(type=LITERAL, value=str(o))
|
||||||
))
|
))
|
||||||
|
|
||||||
# 'Subject of' for s
|
# 'Subject of' for s
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=s_value,
|
s=s_value,
|
||||||
p=SUBJECT_OF_VALUE,
|
p=SUBJECT_OF_VALUE,
|
||||||
o=Value(value=v.metadata.id, is_uri=True)
|
o=Term(type=IRI, iri=v.metadata.id)
|
||||||
))
|
))
|
||||||
|
|
||||||
if rel["object-entity"]:
|
if rel["object-entity"]:
|
||||||
|
|
@ -178,7 +178,7 @@ class Processor(FlowProcessor):
|
||||||
triples.append(Triple(
|
triples.append(Triple(
|
||||||
s=o_value,
|
s=o_value,
|
||||||
p=SUBJECT_OF_VALUE,
|
p=SUBJECT_OF_VALUE,
|
||||||
o=Value(value=v.metadata.id, is_uri=True)
|
o=Term(type=IRI, iri=v.metadata.id)
|
||||||
))
|
))
|
||||||
|
|
||||||
await self.emit_triples(
|
await self.emit_triples(
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import logging
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
from .... schema import Chunk, Triple, Triples, Metadata, Term, IRI, LITERAL
|
||||||
from .... schema import chunk_ingest_queue, triples_store_queue
|
from .... schema import chunk_ingest_queue, triples_store_queue
|
||||||
from .... schema import prompt_request_queue
|
from .... schema import prompt_request_queue
|
||||||
from .... schema import prompt_response_queue
|
from .... schema import prompt_response_queue
|
||||||
|
|
@ -20,7 +20,7 @@ from .... clients.prompt_client import PromptClient
|
||||||
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
|
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION
|
||||||
from .... base import ConsumerProducer
|
from .... base import ConsumerProducer
|
||||||
|
|
||||||
DEFINITION_VALUE = Value(value=DEFINITION, is_uri=True)
|
DEFINITION_VALUE = Term(type=IRI, iri=DEFINITION)
|
||||||
|
|
||||||
module = "kg-extract-topics"
|
module = "kg-extract-topics"
|
||||||
|
|
||||||
|
|
@ -106,8 +106,8 @@ class Processor(ConsumerProducer):
|
||||||
|
|
||||||
s_uri = self.to_uri(s)
|
s_uri = self.to_uri(s)
|
||||||
|
|
||||||
s_value = Value(value=str(s_uri), is_uri=True)
|
s_value = Term(type=IRI, iri=str(s_uri))
|
||||||
o_value = Value(value=str(o), is_uri=False)
|
o_value = Term(type=LITERAL, value=str(o))
|
||||||
|
|
||||||
await self.emit_edge(
|
await self.emit_edge(
|
||||||
v.metadata, s_value, DEFINITION_VALUE, o_value
|
v.metadata, s_value, DEFINITION_VALUE, o_value
|
||||||
|
|
|
||||||
|
|
@ -1,46 +1,37 @@
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
|
from ... schema import Term, Triple, DocumentMetadata, ProcessingMetadata
|
||||||
|
from ... messaging.translators.primitives import TermTranslator, TripleTranslator
|
||||||
|
|
||||||
|
# Singleton translator instances
|
||||||
|
_term_translator = TermTranslator()
|
||||||
|
_triple_translator = TripleTranslator()
|
||||||
|
|
||||||
# DEPRECATED: These functions have been moved to trustgraph.... messaging.translators
|
|
||||||
# Use the new messaging translation system instead for consistency and reusability.
|
|
||||||
# Examples:
|
|
||||||
# from trustgraph.... messaging.translators.primitives import ValueTranslator
|
|
||||||
# value_translator = ValueTranslator()
|
|
||||||
# pulsar_value = value_translator.to_pulsar({"v": "example", "e": True})
|
|
||||||
|
|
||||||
def to_value(x):
|
def to_value(x):
|
||||||
return Value(value=x["v"], is_uri=x["e"])
|
"""Convert dict to Term. Delegates to TermTranslator."""
|
||||||
|
return _term_translator.to_pulsar(x)
|
||||||
|
|
||||||
|
|
||||||
def to_subgraph(x):
|
def to_subgraph(x):
|
||||||
return [
|
"""Convert list of dicts to list of Triples. Delegates to TripleTranslator."""
|
||||||
Triple(
|
return [_triple_translator.to_pulsar(t) for t in x]
|
||||||
s=to_value(t["s"]),
|
|
||||||
p=to_value(t["p"]),
|
|
||||||
o=to_value(t["o"])
|
|
||||||
)
|
|
||||||
for t in x
|
|
||||||
]
|
|
||||||
|
|
||||||
def serialize_value(v):
|
def serialize_value(v):
|
||||||
return {
|
"""Convert Term to dict. Delegates to TermTranslator."""
|
||||||
"v": v.value,
|
return _term_translator.from_pulsar(v)
|
||||||
"e": v.is_uri,
|
|
||||||
}
|
|
||||||
|
|
||||||
def serialize_triple(t):
|
def serialize_triple(t):
|
||||||
return {
|
"""Convert Triple to dict. Delegates to TripleTranslator."""
|
||||||
"s": serialize_value(t.s),
|
return _triple_translator.from_pulsar(t)
|
||||||
"p": serialize_value(t.p),
|
|
||||||
"o": serialize_value(t.o)
|
|
||||||
}
|
|
||||||
|
|
||||||
def serialize_subgraph(sg):
|
def serialize_subgraph(sg):
|
||||||
return [
|
"""Convert list of Triples to list of dicts."""
|
||||||
serialize_triple(t)
|
return [serialize_triple(t) for t in sg]
|
||||||
for t in sg
|
|
||||||
]
|
|
||||||
|
|
||||||
def serialize_triples(message):
|
def serialize_triples(message):
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import logging
|
||||||
|
|
||||||
from .... direct.milvus_doc_embeddings import DocVectors
|
from .... direct.milvus_doc_embeddings import DocVectors
|
||||||
from .... schema import DocumentEmbeddingsResponse
|
from .... schema import DocumentEmbeddingsResponse
|
||||||
from .... schema import Error, Value
|
from .... schema import Error
|
||||||
from .... base import DocumentEmbeddingsQueryService
|
from .... base import DocumentEmbeddingsQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
|
||||||
from qdrant_client.models import Distance, VectorParams
|
from qdrant_client.models import Distance, VectorParams
|
||||||
|
|
||||||
from .... schema import DocumentEmbeddingsResponse
|
from .... schema import DocumentEmbeddingsResponse
|
||||||
from .... schema import Error, Value
|
from .... schema import Error
|
||||||
from .... base import DocumentEmbeddingsQueryService
|
from .... base import DocumentEmbeddingsQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import logging
|
||||||
|
|
||||||
from .... direct.milvus_graph_embeddings import EntityVectors
|
from .... direct.milvus_graph_embeddings import EntityVectors
|
||||||
from .... schema import GraphEmbeddingsResponse
|
from .... schema import GraphEmbeddingsResponse
|
||||||
from .... schema import Error, Value
|
from .... schema import Error, Term, IRI, LITERAL
|
||||||
from .... base import GraphEmbeddingsQueryService
|
from .... base import GraphEmbeddingsQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
|
|
@ -33,9 +33,9 @@ class Processor(GraphEmbeddingsQueryService):
|
||||||
|
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_graph_embeddings(self, msg):
|
async def query_graph_embeddings(self, msg):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ from pinecone import Pinecone, ServerlessSpec
|
||||||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||||
|
|
||||||
from .... schema import GraphEmbeddingsResponse
|
from .... schema import GraphEmbeddingsResponse
|
||||||
from .... schema import Error, Value
|
from .... schema import Error, Term, IRI, LITERAL
|
||||||
from .... base import GraphEmbeddingsQueryService
|
from .... base import GraphEmbeddingsQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
|
|
@ -51,9 +51,9 @@ class Processor(GraphEmbeddingsQueryService):
|
||||||
|
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_graph_embeddings(self, msg):
|
async def query_graph_embeddings(self, msg):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from qdrant_client.models import PointStruct
|
||||||
from qdrant_client.models import Distance, VectorParams
|
from qdrant_client.models import Distance, VectorParams
|
||||||
|
|
||||||
from .... schema import GraphEmbeddingsResponse
|
from .... schema import GraphEmbeddingsResponse
|
||||||
from .... schema import Error, Value
|
from .... schema import Error, Term, IRI, LITERAL
|
||||||
from .... base import GraphEmbeddingsQueryService
|
from .... base import GraphEmbeddingsQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
|
|
@ -67,9 +67,9 @@ class Processor(GraphEmbeddingsQueryService):
|
||||||
|
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_graph_embeddings(self, msg):
|
async def query_graph_embeddings(self, msg):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Triples query service. Input is a (s, p, o) triple, some values may be
|
Triples query service. Input is a (s, p, o, g) quad pattern, some values may be
|
||||||
null. Output is a list of triples.
|
null. Output is a list of quads.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .... direct.cassandra_kg import KnowledgeGraph
|
from .... direct.cassandra_kg import KnowledgeGraph, GRAPH_WILDCARD, DEFAULT_GRAPH
|
||||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .... schema import Value, Triple
|
from .... schema import Term, Triple, IRI, LITERAL
|
||||||
from .... base import TriplesQueryService
|
from .... base import TriplesQueryService
|
||||||
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
||||||
|
|
||||||
|
|
@ -18,6 +18,27 @@ logger = logging.getLogger(__name__)
|
||||||
default_ident = "triples-query"
|
default_ident = "triples-query"
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
|
def create_term(value):
|
||||||
|
"""Create a Term from a string value"""
|
||||||
|
if value.startswith("http://") or value.startswith("https://"):
|
||||||
|
return Term(type=IRI, iri=value)
|
||||||
|
else:
|
||||||
|
return Term(type=LITERAL, value=value)
|
||||||
|
|
||||||
|
|
||||||
class Processor(TriplesQueryService):
|
class Processor(TriplesQueryService):
|
||||||
|
|
||||||
def __init__(self, **params):
|
def __init__(self, **params):
|
||||||
|
|
@ -46,12 +67,6 @@ class Processor(TriplesQueryService):
|
||||||
self.cassandra_password = password
|
self.cassandra_password = password
|
||||||
self.table = None
|
self.table = None
|
||||||
|
|
||||||
def create_value(self, ent):
|
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
|
||||||
return Value(value=ent, is_uri=True)
|
|
||||||
else:
|
|
||||||
return Value(value=ent, is_uri=False)
|
|
||||||
|
|
||||||
async def query_triples(self, query):
|
async def query_triples(self, query):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -72,77 +87,103 @@ class Processor(TriplesQueryService):
|
||||||
)
|
)
|
||||||
self.table = user
|
self.table = user
|
||||||
|
|
||||||
triples = []
|
# Extract values from query
|
||||||
|
s_val = get_term_value(query.s)
|
||||||
|
p_val = get_term_value(query.p)
|
||||||
|
o_val = get_term_value(query.o)
|
||||||
|
g_val = query.g # Already a string or None
|
||||||
|
|
||||||
if query.s is not None:
|
quads = []
|
||||||
if query.p is not None:
|
|
||||||
if query.o is not None:
|
# Route to appropriate query method based on which fields are specified
|
||||||
|
if s_val is not None:
|
||||||
|
if p_val is not None:
|
||||||
|
if o_val is not None:
|
||||||
|
# SPO specified - find matching graphs
|
||||||
resp = self.tg.get_spo(
|
resp = self.tg.get_spo(
|
||||||
query.collection, query.s.value, query.p.value, query.o.value,
|
query.collection, s_val, p_val, o_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
for t in resp:
|
||||||
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((s_val, p_val, o_val, g))
|
||||||
else:
|
else:
|
||||||
|
# SP specified
|
||||||
resp = self.tg.get_sp(
|
resp = self.tg.get_sp(
|
||||||
query.collection, query.s.value, query.p.value,
|
query.collection, s_val, p_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((query.s.value, query.p.value, t.o))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((s_val, p_val, t.o, g))
|
||||||
else:
|
else:
|
||||||
if query.o is not None:
|
if o_val is not None:
|
||||||
|
# SO specified
|
||||||
resp = self.tg.get_os(
|
resp = self.tg.get_os(
|
||||||
query.collection, query.o.value, query.s.value,
|
query.collection, o_val, s_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((query.s.value, t.p, query.o.value))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((s_val, t.p, o_val, g))
|
||||||
else:
|
else:
|
||||||
|
# S only
|
||||||
resp = self.tg.get_s(
|
resp = self.tg.get_s(
|
||||||
query.collection, query.s.value,
|
query.collection, s_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((query.s.value, t.p, t.o))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((s_val, t.p, t.o, g))
|
||||||
else:
|
else:
|
||||||
if query.p is not None:
|
if p_val is not None:
|
||||||
if query.o is not None:
|
if o_val is not None:
|
||||||
|
# PO specified
|
||||||
resp = self.tg.get_po(
|
resp = self.tg.get_po(
|
||||||
query.collection, query.p.value, query.o.value,
|
query.collection, p_val, o_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((t.s, query.p.value, query.o.value))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((t.s, p_val, o_val, g))
|
||||||
else:
|
else:
|
||||||
|
# P only
|
||||||
resp = self.tg.get_p(
|
resp = self.tg.get_p(
|
||||||
query.collection, query.p.value,
|
query.collection, p_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((t.s, query.p.value, t.o))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((t.s, p_val, t.o, g))
|
||||||
else:
|
else:
|
||||||
if query.o is not None:
|
if o_val is not None:
|
||||||
|
# O only
|
||||||
resp = self.tg.get_o(
|
resp = self.tg.get_o(
|
||||||
query.collection, query.o.value,
|
query.collection, o_val, g=g_val,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((t.s, t.p, query.o.value))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((t.s, t.p, o_val, g))
|
||||||
else:
|
else:
|
||||||
|
# Nothing specified - get all
|
||||||
resp = self.tg.get_all(
|
resp = self.tg.get_all(
|
||||||
query.collection,
|
query.collection,
|
||||||
limit=query.limit
|
limit=query.limit
|
||||||
)
|
)
|
||||||
for t in resp:
|
for t in resp:
|
||||||
triples.append((t.s, t.p, t.o))
|
g = t.g if hasattr(t, 'g') else DEFAULT_GRAPH
|
||||||
|
quads.append((t.s, t.p, t.o, g))
|
||||||
|
|
||||||
|
# Convert to Triple objects (with g field)
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s=self.create_value(t[0]),
|
s=create_term(q[0]),
|
||||||
p=self.create_value(t[1]),
|
p=create_term(q[1]),
|
||||||
o=self.create_value(t[2])
|
o=create_term(q[2]),
|
||||||
|
g=q[3] if q[3] != DEFAULT_GRAPH else None
|
||||||
)
|
)
|
||||||
for t in triples
|
for q in quads
|
||||||
]
|
]
|
||||||
|
|
||||||
return triples
|
return triples
|
||||||
|
|
@ -162,4 +203,3 @@ class Processor(TriplesQueryService):
|
||||||
def run():
|
def run():
|
||||||
|
|
||||||
Processor.launch(default_ident, __doc__)
|
Processor.launch(default_ident, __doc__)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,24 @@ import logging
|
||||||
from falkordb import FalkorDB
|
from falkordb import FalkorDB
|
||||||
|
|
||||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .... schema import Value, Triple
|
from .... schema import Term, Triple, IRI, LITERAL
|
||||||
from .... base import TriplesQueryService
|
from .... base import TriplesQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
default_ident = "triples-query"
|
default_ident = "triples-query"
|
||||||
|
|
||||||
default_graph_url = 'falkor://falkordb:6379'
|
default_graph_url = 'falkor://falkordb:6379'
|
||||||
|
|
@ -42,9 +54,9 @@ class Processor(TriplesQueryService):
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
|
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_triples(self, query):
|
async def query_triples(self, query):
|
||||||
|
|
||||||
|
|
@ -63,28 +75,28 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"rel": query.p.value,
|
"rel": get_term_value(query.p),
|
||||||
"value": query.o.value,
|
"value": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
|
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node {uri: $uri}) "
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"rel": query.p.value,
|
"rel": get_term_value(query.p),
|
||||||
"uri": query.o.value,
|
"uri": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -95,26 +107,26 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN dest.value as dest "
|
"RETURN dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"rel": query.p.value,
|
"rel": get_term_value(query.p),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, rec[0]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
|
"MATCH (src:Node {uri: $src})-[rel:Rel {uri: $rel}]->(dest:Node) "
|
||||||
"RETURN dest.uri as dest "
|
"RETURN dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"rel": query.p.value,
|
"rel": get_term_value(query.p),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, rec[0]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), rec[0]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -127,26 +139,26 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"value": query.o.value,
|
"value": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, rec[0], query.o.value))
|
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
|
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node {uri: $uri}) "
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
"uri": query.o.value,
|
"uri": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, rec[0], query.o.value))
|
triples.append((get_term_value(query.s), rec[0], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -157,24 +169,24 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN rel.uri as rel, dest.value as dest "
|
"RETURN rel.uri as rel, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, rec[0], rec[1]))
|
triples.append((get_term_value(query.s), rec[0], rec[1]))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
|
"MATCH (src:Node {uri: $src})-[rel:Rel]->(dest:Node) "
|
||||||
"RETURN rel.uri as rel, dest.uri as dest "
|
"RETURN rel.uri as rel, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"src": query.s.value,
|
"src": get_term_value(query.s),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, rec[0], rec[1]))
|
triples.append((get_term_value(query.s), rec[0], rec[1]))
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -190,26 +202,26 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"uri": query.p.value,
|
"uri": get_term_value(query.p),
|
||||||
"value": query.o.value,
|
"value": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], query.p.value, query.o.value))
|
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $dest}) "
|
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node {uri: $dest}) "
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"uri": query.p.value,
|
"uri": get_term_value(query.p),
|
||||||
"dest": query.o.value,
|
"dest": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], query.p.value, query.o.value))
|
triples.append((rec[0], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -220,24 +232,24 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN src.uri as src, dest.value as dest "
|
"RETURN src.uri as src, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"uri": query.p.value,
|
"uri": get_term_value(query.p),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], query.p.value, rec[1]))
|
triples.append((rec[0], get_term_value(query.p), rec[1]))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
|
"MATCH (src:Node)-[rel:Rel {uri: $uri}]->(dest:Node) "
|
||||||
"RETURN src.uri as src, dest.uri as dest "
|
"RETURN src.uri as src, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"uri": query.p.value,
|
"uri": get_term_value(query.p),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], query.p.value, rec[1]))
|
triples.append((rec[0], get_term_value(query.p), rec[1]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -250,24 +262,24 @@ class Processor(TriplesQueryService):
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"value": query.o.value,
|
"value": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], rec[1], query.o.value))
|
triples.append((rec[0], rec[1], get_term_value(query.o)))
|
||||||
|
|
||||||
records = self.io.query(
|
records = self.io.query(
|
||||||
"MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
|
"MATCH (src:Node)-[rel:Rel]->(dest:Node {uri: $uri}) "
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
params={
|
params={
|
||||||
"uri": query.o.value,
|
"uri": get_term_value(query.o),
|
||||||
},
|
},
|
||||||
).result_set
|
).result_set
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((rec[0], rec[1], query.o.value))
|
triples.append((rec[0], rec[1], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,24 @@ import logging
|
||||||
from neo4j import GraphDatabase
|
from neo4j import GraphDatabase
|
||||||
|
|
||||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .... schema import Value, Triple
|
from .... schema import Term, Triple, IRI, LITERAL
|
||||||
from .... base import TriplesQueryService
|
from .... base import TriplesQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
default_ident = "triples-query"
|
default_ident = "triples-query"
|
||||||
|
|
||||||
default_graph_host = 'bolt://memgraph:7687'
|
default_graph_host = 'bolt://memgraph:7687'
|
||||||
|
|
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
|
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_triples(self, query):
|
async def query_triples(self, query):
|
||||||
|
|
||||||
|
|
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value, value=query.o.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value, uri=query.o.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN dest.value as dest "
|
"RETURN dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN dest.uri as dest "
|
"RETURN dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, value=query.o.value,
|
src=get_term_value(query.s), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], query.o.value))
|
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, uri=query.o.value,
|
src=get_term_value(query.s), uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], query.o.value))
|
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel, dest.value as dest "
|
"RETURN rel.uri as rel, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value,
|
src=get_term_value(query.s),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel, dest.uri as dest "
|
"RETURN rel.uri as rel, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value,
|
src=get_term_value(query.s),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value, value=query.o.value,
|
uri=get_term_value(query.p), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, query.o.value))
|
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value, dest=query.o.value,
|
uri=get_term_value(query.p), dest=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, query.o.value))
|
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, dest.value as dest "
|
"RETURN src.uri as src, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value,
|
uri=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, data["dest"]))
|
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, dest.uri as dest "
|
"RETURN src.uri as src, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value,
|
uri=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, data["dest"]))
|
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
value=query.o.value,
|
value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], data["rel"], query.o.value))
|
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.o.value,
|
uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], data["rel"], query.o.value))
|
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,24 @@ import logging
|
||||||
from neo4j import GraphDatabase
|
from neo4j import GraphDatabase
|
||||||
|
|
||||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||||
from .... schema import Value, Triple
|
from .... schema import Term, Triple, IRI, LITERAL
|
||||||
from .... base import TriplesQueryService
|
from .... base import TriplesQueryService
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
default_ident = "triples-query"
|
default_ident = "triples-query"
|
||||||
|
|
||||||
default_graph_host = 'bolt://neo4j:7687'
|
default_graph_host = 'bolt://neo4j:7687'
|
||||||
|
|
@ -47,9 +59,9 @@ class Processor(TriplesQueryService):
|
||||||
def create_value(self, ent):
|
def create_value(self, ent):
|
||||||
|
|
||||||
if ent.startswith("http://") or ent.startswith("https://"):
|
if ent.startswith("http://") or ent.startswith("https://"):
|
||||||
return Value(value=ent, is_uri=True)
|
return Term(type=IRI, iri=ent)
|
||||||
else:
|
else:
|
||||||
return Value(value=ent, is_uri=False)
|
return Term(type=LITERAL, value=ent)
|
||||||
|
|
||||||
async def query_triples(self, query):
|
async def query_triples(self, query):
|
||||||
|
|
||||||
|
|
@ -73,13 +85,13 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value, value=query.o.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -87,13 +99,13 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN $src as src "
|
"RETURN $src as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value, uri=query.o.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p), uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
triples.append((query.s.value, query.p.value, query.o.value))
|
triples.append((get_term_value(query.s), get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -105,14 +117,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN dest.value as dest "
|
"RETURN dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -120,14 +132,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN dest.uri as dest "
|
"RETURN dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, rel=query.p.value,
|
src=get_term_value(query.s), rel=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, query.p.value, data["dest"]))
|
triples.append((get_term_value(query.s), get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -141,14 +153,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, value=query.o.value,
|
src=get_term_value(query.s), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], query.o.value))
|
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -156,14 +168,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel "
|
"RETURN rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value, uri=query.o.value,
|
src=get_term_value(query.s), uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], query.o.value))
|
triples.append((get_term_value(query.s), data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -175,14 +187,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel, dest.value as dest "
|
"RETURN rel.uri as rel, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value,
|
src=get_term_value(query.s),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection})-"
|
||||||
|
|
@ -190,14 +202,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN rel.uri as rel, dest.uri as dest "
|
"RETURN rel.uri as rel, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
src=query.s.value,
|
src=get_term_value(query.s),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((query.s.value, data["rel"], data["dest"]))
|
triples.append((get_term_value(query.s), data["rel"], data["dest"]))
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -214,14 +226,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value, value=query.o.value,
|
uri=get_term_value(query.p), value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, query.o.value))
|
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -229,14 +241,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src "
|
"RETURN src.uri as src "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value, dest=query.o.value,
|
uri=get_term_value(query.p), dest=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, query.o.value))
|
triples.append((data["src"], get_term_value(query.p), get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -248,14 +260,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {user: $user, collection: $collection}) "
|
"(dest:Literal {user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, dest.value as dest "
|
"RETURN src.uri as src, dest.value as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value,
|
uri=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, data["dest"]))
|
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -263,14 +275,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {user: $user, collection: $collection}) "
|
"(dest:Node {user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, dest.uri as dest "
|
"RETURN src.uri as src, dest.uri as dest "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.p.value,
|
uri=get_term_value(query.p),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], query.p.value, data["dest"]))
|
triples.append((data["src"], get_term_value(query.p), data["dest"]))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
@ -284,14 +296,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
"(dest:Literal {value: $value, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
value=query.o.value,
|
value=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], data["rel"], query.o.value))
|
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
records, summary, keys = self.io.execute_query(
|
records, summary, keys = self.io.execute_query(
|
||||||
"MATCH (src:Node {user: $user, collection: $collection})-"
|
"MATCH (src:Node {user: $user, collection: $collection})-"
|
||||||
|
|
@ -299,14 +311,14 @@ class Processor(TriplesQueryService):
|
||||||
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
"(dest:Node {uri: $uri, user: $user, collection: $collection}) "
|
||||||
"RETURN src.uri as src, rel.uri as rel "
|
"RETURN src.uri as src, rel.uri as rel "
|
||||||
"LIMIT " + str(query.limit),
|
"LIMIT " + str(query.limit),
|
||||||
uri=query.o.value,
|
uri=get_term_value(query.o),
|
||||||
user=user, collection=collection,
|
user=user, collection=collection,
|
||||||
database_=self.db,
|
database_=self.db,
|
||||||
)
|
)
|
||||||
|
|
||||||
for rec in records:
|
for rec in records:
|
||||||
data = rec.data()
|
data = rec.data()
|
||||||
triples.append((data["src"], data["rel"], query.o.value))
|
triples.append((data["src"], data["rel"], get_term_value(query.o)))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,24 @@ from .... direct.milvus_graph_embeddings import EntityVectors
|
||||||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
default_ident = "ge-write"
|
default_ident = "ge-write"
|
||||||
default_store_uri = 'http://localhost:19530'
|
default_store_uri = 'http://localhost:19530'
|
||||||
|
|
||||||
|
|
@ -36,11 +50,12 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
||||||
async def store_graph_embeddings(self, message):
|
async def store_graph_embeddings(self, message):
|
||||||
|
|
||||||
for entity in message.entities:
|
for entity in message.entities:
|
||||||
|
entity_value = get_term_value(entity.entity)
|
||||||
|
|
||||||
if entity.entity.value != "" and entity.entity.value is not None:
|
if entity_value != "" and entity_value is not None:
|
||||||
for vec in entity.vectors:
|
for vec in entity.vectors:
|
||||||
self.vecstore.insert(
|
self.vecstore.insert(
|
||||||
vec, entity.entity.value,
|
vec, entity_value,
|
||||||
message.metadata.user,
|
message.metadata.user,
|
||||||
message.metadata.collection
|
message.metadata.collection
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,24 @@ import logging
|
||||||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
default_ident = "ge-write"
|
default_ident = "ge-write"
|
||||||
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
|
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
|
||||||
default_cloud = "aws"
|
default_cloud = "aws"
|
||||||
|
|
@ -100,8 +114,9 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
||||||
return
|
return
|
||||||
|
|
||||||
for entity in message.entities:
|
for entity in message.entities:
|
||||||
|
entity_value = get_term_value(entity.entity)
|
||||||
|
|
||||||
if entity.entity.value == "" or entity.entity.value is None:
|
if entity_value == "" or entity_value is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
for vec in entity.vectors:
|
for vec in entity.vectors:
|
||||||
|
|
@ -126,7 +141,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
||||||
{
|
{
|
||||||
"id": vector_id,
|
"id": vector_id,
|
||||||
"values": vec,
|
"values": vec,
|
||||||
"metadata": { "entity": entity.entity.value },
|
"metadata": { "entity": entity_value },
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,10 +12,25 @@ import logging
|
||||||
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
from .... base import GraphEmbeddingsStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
default_ident = "ge-write"
|
default_ident = "ge-write"
|
||||||
|
|
||||||
default_store_uri = 'http://localhost:6333'
|
default_store_uri = 'http://localhost:6333'
|
||||||
|
|
@ -51,8 +66,10 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
||||||
return
|
return
|
||||||
|
|
||||||
for entity in message.entities:
|
for entity in message.entities:
|
||||||
|
entity_value = get_term_value(entity.entity)
|
||||||
|
|
||||||
if entity.entity.value == "" or entity.entity.value is None: return
|
if entity_value == "" or entity_value is None:
|
||||||
|
continue
|
||||||
|
|
||||||
for vec in entity.vectors:
|
for vec in entity.vectors:
|
||||||
|
|
||||||
|
|
@ -80,7 +97,7 @@ class Processor(CollectionConfigHandler, GraphEmbeddingsStoreService):
|
||||||
id=str(uuid.uuid4()),
|
id=str(uuid.uuid4()),
|
||||||
vector=vec,
|
vector=vec,
|
||||||
payload={
|
payload={
|
||||||
"entity": entity.entity.value,
|
"entity": entity_value,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -10,11 +10,12 @@ import argparse
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from .... direct.cassandra_kg import KnowledgeGraph
|
from .... direct.cassandra_kg import KnowledgeGraph, DEFAULT_GRAPH
|
||||||
from .... base import TriplesStoreService, CollectionConfigHandler
|
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
from .... base.cassandra_config import add_cassandra_args, resolve_cassandra_config
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -22,6 +23,19 @@ logger = logging.getLogger(__name__)
|
||||||
default_ident = "triples-write"
|
default_ident = "triples-write"
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
class Processor(CollectionConfigHandler, TriplesStoreService):
|
class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
|
|
||||||
def __init__(self, **params):
|
def __init__(self, **params):
|
||||||
|
|
@ -84,11 +98,19 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
self.table = user
|
self.table = user
|
||||||
|
|
||||||
for t in message.triples:
|
for t in message.triples:
|
||||||
|
# Extract values from Term objects
|
||||||
|
s_val = get_term_value(t.s)
|
||||||
|
p_val = get_term_value(t.p)
|
||||||
|
o_val = get_term_value(t.o)
|
||||||
|
# t.g is None for default graph, or a graph IRI
|
||||||
|
g_val = t.g if t.g is not None else DEFAULT_GRAPH
|
||||||
|
|
||||||
self.tg.insert(
|
self.tg.insert(
|
||||||
message.metadata.collection,
|
message.metadata.collection,
|
||||||
t.s.value,
|
s_val,
|
||||||
t.p.value,
|
p_val,
|
||||||
t.o.value
|
o_val,
|
||||||
|
g=g_val
|
||||||
)
|
)
|
||||||
|
|
||||||
async def create_collection(self, user: str, collection: str, metadata: dict):
|
async def create_collection(self, user: str, collection: str, metadata: dict):
|
||||||
|
|
|
||||||
|
|
@ -15,12 +15,27 @@ from falkordb import FalkorDB
|
||||||
from .... base import TriplesStoreService, CollectionConfigHandler
|
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
default_ident = "triples-write"
|
default_ident = "triples-write"
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
default_graph_url = 'falkor://falkordb:6379'
|
default_graph_url = 'falkor://falkordb:6379'
|
||||||
default_database = 'falkordb'
|
default_database = 'falkordb'
|
||||||
|
|
||||||
|
|
@ -164,14 +179,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
|
|
||||||
for t in message.triples:
|
for t in message.triples:
|
||||||
|
|
||||||
self.create_node(t.s.value, user, collection)
|
s_val = get_term_value(t.s)
|
||||||
|
p_val = get_term_value(t.p)
|
||||||
|
o_val = get_term_value(t.o)
|
||||||
|
|
||||||
if t.o.is_uri:
|
self.create_node(s_val, user, collection)
|
||||||
self.create_node(t.o.value, user, collection)
|
|
||||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
if t.o.type == IRI:
|
||||||
|
self.create_node(o_val, user, collection)
|
||||||
|
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||||
else:
|
else:
|
||||||
self.create_literal(t.o.value, user, collection)
|
self.create_literal(o_val, user, collection)
|
||||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_args(parser):
|
def add_args(parser):
|
||||||
|
|
|
||||||
|
|
@ -15,12 +15,27 @@ from neo4j import GraphDatabase
|
||||||
from .... base import TriplesStoreService, CollectionConfigHandler
|
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
default_ident = "triples-write"
|
default_ident = "triples-write"
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
default_graph_host = 'bolt://memgraph:7687'
|
default_graph_host = 'bolt://memgraph:7687'
|
||||||
default_username = 'memgraph'
|
default_username = 'memgraph'
|
||||||
default_password = 'password'
|
default_password = 'password'
|
||||||
|
|
@ -204,25 +219,29 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
|
|
||||||
def create_triple(self, tx, t, user, collection):
|
def create_triple(self, tx, t, user, collection):
|
||||||
|
|
||||||
|
s_val = get_term_value(t.s)
|
||||||
|
p_val = get_term_value(t.p)
|
||||||
|
o_val = get_term_value(t.o)
|
||||||
|
|
||||||
# Create new s node with given uri, if not exists
|
# Create new s node with given uri, if not exists
|
||||||
result = tx.run(
|
result = tx.run(
|
||||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||||
uri=t.s.value, user=user, collection=collection
|
uri=s_val, user=user, collection=collection
|
||||||
)
|
)
|
||||||
|
|
||||||
if t.o.is_uri:
|
if t.o.type == IRI:
|
||||||
|
|
||||||
# Create new o node with given uri, if not exists
|
# Create new o node with given uri, if not exists
|
||||||
result = tx.run(
|
result = tx.run(
|
||||||
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
"MERGE (n:Node {uri: $uri, user: $user, collection: $collection})",
|
||||||
uri=t.o.value, user=user, collection=collection
|
uri=o_val, user=user, collection=collection
|
||||||
)
|
)
|
||||||
|
|
||||||
result = tx.run(
|
result = tx.run(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||||
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
"MATCH (dest:Node {uri: $dest, user: $user, collection: $collection}) "
|
||||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||||
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
|
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
|
||||||
)
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
@ -230,14 +249,14 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
# Create new o literal with given uri, if not exists
|
# Create new o literal with given uri, if not exists
|
||||||
result = tx.run(
|
result = tx.run(
|
||||||
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
"MERGE (n:Literal {value: $value, user: $user, collection: $collection})",
|
||||||
value=t.o.value, user=user, collection=collection
|
value=o_val, user=user, collection=collection
|
||||||
)
|
)
|
||||||
|
|
||||||
result = tx.run(
|
result = tx.run(
|
||||||
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
"MATCH (src:Node {uri: $src, user: $user, collection: $collection}) "
|
||||||
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
"MATCH (dest:Literal {value: $dest, user: $user, collection: $collection}) "
|
||||||
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
"MERGE (src)-[:Rel {uri: $uri, user: $user, collection: $collection}]->(dest)",
|
||||||
src=t.s.value, dest=t.o.value, uri=t.p.value, user=user, collection=collection,
|
src=s_val, dest=o_val, uri=p_val, user=user, collection=collection,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def store_triples(self, message):
|
async def store_triples(self, message):
|
||||||
|
|
@ -257,14 +276,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
|
|
||||||
for t in message.triples:
|
for t in message.triples:
|
||||||
|
|
||||||
self.create_node(t.s.value, user, collection)
|
s_val = get_term_value(t.s)
|
||||||
|
p_val = get_term_value(t.p)
|
||||||
|
o_val = get_term_value(t.o)
|
||||||
|
|
||||||
if t.o.is_uri:
|
self.create_node(s_val, user, collection)
|
||||||
self.create_node(t.o.value, user, collection)
|
|
||||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
if t.o.type == IRI:
|
||||||
|
self.create_node(o_val, user, collection)
|
||||||
|
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||||
else:
|
else:
|
||||||
self.create_literal(t.o.value, user, collection)
|
self.create_literal(o_val, user, collection)
|
||||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||||
|
|
||||||
# Alternative implementation using transactions
|
# Alternative implementation using transactions
|
||||||
# with self.io.session(database=self.db) as session:
|
# with self.io.session(database=self.db) as session:
|
||||||
|
|
|
||||||
|
|
@ -14,12 +14,27 @@ from neo4j import GraphDatabase
|
||||||
from .... base import TriplesStoreService, CollectionConfigHandler
|
from .... base import TriplesStoreService, CollectionConfigHandler
|
||||||
from .... base import AsyncProcessor, Consumer, Producer
|
from .... base import AsyncProcessor, Consumer, Producer
|
||||||
from .... base import ConsumerMetrics, ProducerMetrics
|
from .... base import ConsumerMetrics, ProducerMetrics
|
||||||
|
from .... schema import IRI, LITERAL
|
||||||
|
|
||||||
# Module logger
|
# Module logger
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
default_ident = "triples-write"
|
default_ident = "triples-write"
|
||||||
|
|
||||||
|
|
||||||
|
def get_term_value(term):
|
||||||
|
"""Extract the string value from a Term"""
|
||||||
|
if term is None:
|
||||||
|
return None
|
||||||
|
if term.type == IRI:
|
||||||
|
return term.iri
|
||||||
|
elif term.type == LITERAL:
|
||||||
|
return term.value
|
||||||
|
else:
|
||||||
|
# For blank nodes or other types, use id or value
|
||||||
|
return term.id or term.value
|
||||||
|
|
||||||
|
|
||||||
default_graph_host = 'bolt://neo4j:7687'
|
default_graph_host = 'bolt://neo4j:7687'
|
||||||
default_username = 'neo4j'
|
default_username = 'neo4j'
|
||||||
default_password = 'password'
|
default_password = 'password'
|
||||||
|
|
@ -212,14 +227,18 @@ class Processor(CollectionConfigHandler, TriplesStoreService):
|
||||||
|
|
||||||
for t in message.triples:
|
for t in message.triples:
|
||||||
|
|
||||||
self.create_node(t.s.value, user, collection)
|
s_val = get_term_value(t.s)
|
||||||
|
p_val = get_term_value(t.p)
|
||||||
|
o_val = get_term_value(t.o)
|
||||||
|
|
||||||
if t.o.is_uri:
|
self.create_node(s_val, user, collection)
|
||||||
self.create_node(t.o.value, user, collection)
|
|
||||||
self.relate_node(t.s.value, t.p.value, t.o.value, user, collection)
|
if t.o.type == IRI:
|
||||||
|
self.create_node(o_val, user, collection)
|
||||||
|
self.relate_node(s_val, p_val, o_val, user, collection)
|
||||||
else:
|
else:
|
||||||
self.create_literal(t.o.value, user, collection)
|
self.create_literal(o_val, user, collection)
|
||||||
self.relate_literal(t.s.value, t.p.value, t.o.value, user, collection)
|
self.relate_literal(s_val, p_val, o_val, user, collection)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_args(parser):
|
def add_args(parser):
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
|
|
||||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||||
from .. schema import Metadata, Value, GraphEmbeddings
|
from .. schema import Metadata, GraphEmbeddings
|
||||||
|
|
||||||
from cassandra.cluster import Cluster
|
from cassandra.cluster import Cluster
|
||||||
from cassandra.auth import PlainTextAuthProvider
|
from cassandra.auth import PlainTextAuthProvider
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,24 @@
|
||||||
|
|
||||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||||
from .. schema import Metadata, Value, GraphEmbeddings
|
from .. schema import Metadata, Term, IRI, LITERAL, GraphEmbeddings
|
||||||
|
|
||||||
from cassandra.cluster import Cluster
|
from cassandra.cluster import Cluster
|
||||||
|
|
||||||
|
|
||||||
|
def term_to_tuple(term):
|
||||||
|
"""Convert Term to (value, is_uri) tuple for database storage."""
|
||||||
|
if term.type == IRI:
|
||||||
|
return (term.iri, True)
|
||||||
|
else: # LITERAL
|
||||||
|
return (term.value, False)
|
||||||
|
|
||||||
|
|
||||||
|
def tuple_to_term(value, is_uri):
|
||||||
|
"""Convert (value, is_uri) tuple from database to Term."""
|
||||||
|
if is_uri:
|
||||||
|
return Term(type=IRI, iri=value)
|
||||||
|
else:
|
||||||
|
return Term(type=LITERAL, value=value)
|
||||||
from cassandra.auth import PlainTextAuthProvider
|
from cassandra.auth import PlainTextAuthProvider
|
||||||
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
||||||
|
|
||||||
|
|
@ -205,8 +221,7 @@ class KnowledgeTableStore:
|
||||||
if m.metadata.metadata:
|
if m.metadata.metadata:
|
||||||
metadata = [
|
metadata = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in m.metadata.metadata
|
for v in m.metadata.metadata
|
||||||
]
|
]
|
||||||
|
|
@ -215,8 +230,7 @@ class KnowledgeTableStore:
|
||||||
|
|
||||||
triples = [
|
triples = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in m.triples
|
for v in m.triples
|
||||||
]
|
]
|
||||||
|
|
@ -248,8 +262,7 @@ class KnowledgeTableStore:
|
||||||
if m.metadata.metadata:
|
if m.metadata.metadata:
|
||||||
metadata = [
|
metadata = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in m.metadata.metadata
|
for v in m.metadata.metadata
|
||||||
]
|
]
|
||||||
|
|
@ -258,7 +271,7 @@ class KnowledgeTableStore:
|
||||||
|
|
||||||
entities = [
|
entities = [
|
||||||
(
|
(
|
||||||
(v.entity.value, v.entity.is_uri),
|
term_to_tuple(v.entity),
|
||||||
v.vectors
|
v.vectors
|
||||||
)
|
)
|
||||||
for v in m.entities
|
for v in m.entities
|
||||||
|
|
@ -291,8 +304,7 @@ class KnowledgeTableStore:
|
||||||
if m.metadata.metadata:
|
if m.metadata.metadata:
|
||||||
metadata = [
|
metadata = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in m.metadata.metadata
|
for v in m.metadata.metadata
|
||||||
]
|
]
|
||||||
|
|
@ -414,9 +426,9 @@ class KnowledgeTableStore:
|
||||||
if row[2]:
|
if row[2]:
|
||||||
metadata = [
|
metadata = [
|
||||||
Triple(
|
Triple(
|
||||||
s = Value(value = elt[0], is_uri = elt[1]),
|
s = tuple_to_term(elt[0], elt[1]),
|
||||||
p = Value(value = elt[2], is_uri = elt[3]),
|
p = tuple_to_term(elt[2], elt[3]),
|
||||||
o = Value(value = elt[4], is_uri = elt[5]),
|
o = tuple_to_term(elt[4], elt[5]),
|
||||||
)
|
)
|
||||||
for elt in row[2]
|
for elt in row[2]
|
||||||
]
|
]
|
||||||
|
|
@ -425,9 +437,9 @@ class KnowledgeTableStore:
|
||||||
|
|
||||||
triples = [
|
triples = [
|
||||||
Triple(
|
Triple(
|
||||||
s = Value(value = elt[0], is_uri = elt[1]),
|
s = tuple_to_term(elt[0], elt[1]),
|
||||||
p = Value(value = elt[2], is_uri = elt[3]),
|
p = tuple_to_term(elt[2], elt[3]),
|
||||||
o = Value(value = elt[4], is_uri = elt[5]),
|
o = tuple_to_term(elt[4], elt[5]),
|
||||||
)
|
)
|
||||||
for elt in row[3]
|
for elt in row[3]
|
||||||
]
|
]
|
||||||
|
|
@ -470,9 +482,9 @@ class KnowledgeTableStore:
|
||||||
if row[2]:
|
if row[2]:
|
||||||
metadata = [
|
metadata = [
|
||||||
Triple(
|
Triple(
|
||||||
s = Value(value = elt[0], is_uri = elt[1]),
|
s = tuple_to_term(elt[0], elt[1]),
|
||||||
p = Value(value = elt[2], is_uri = elt[3]),
|
p = tuple_to_term(elt[2], elt[3]),
|
||||||
o = Value(value = elt[4], is_uri = elt[5]),
|
o = tuple_to_term(elt[4], elt[5]),
|
||||||
)
|
)
|
||||||
for elt in row[2]
|
for elt in row[2]
|
||||||
]
|
]
|
||||||
|
|
@ -481,7 +493,7 @@ class KnowledgeTableStore:
|
||||||
|
|
||||||
entities = [
|
entities = [
|
||||||
EntityEmbeddings(
|
EntityEmbeddings(
|
||||||
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
|
entity = tuple_to_term(ent[0][0], ent[0][1]),
|
||||||
vectors = ent[1]
|
vectors = ent[1]
|
||||||
)
|
)
|
||||||
for ent in row[3]
|
for ent in row[3]
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,24 @@
|
||||||
|
|
||||||
from .. schema import LibrarianRequest, LibrarianResponse
|
from .. schema import LibrarianRequest, LibrarianResponse
|
||||||
from .. schema import DocumentMetadata, ProcessingMetadata
|
from .. schema import DocumentMetadata, ProcessingMetadata
|
||||||
from .. schema import Error, Triple, Value
|
from .. schema import Error, Triple, Term, IRI, LITERAL
|
||||||
from .. knowledge import hash
|
from .. knowledge import hash
|
||||||
|
|
||||||
|
|
||||||
|
def term_to_tuple(term):
|
||||||
|
"""Convert Term to (value, is_uri) tuple for database storage."""
|
||||||
|
if term.type == IRI:
|
||||||
|
return (term.iri, True)
|
||||||
|
else: # LITERAL
|
||||||
|
return (term.value, False)
|
||||||
|
|
||||||
|
|
||||||
|
def tuple_to_term(value, is_uri):
|
||||||
|
"""Convert (value, is_uri) tuple from database to Term."""
|
||||||
|
if is_uri:
|
||||||
|
return Term(type=IRI, iri=value)
|
||||||
|
else:
|
||||||
|
return Term(type=LITERAL, value=value)
|
||||||
from .. exceptions import RequestError
|
from .. exceptions import RequestError
|
||||||
|
|
||||||
from cassandra.cluster import Cluster
|
from cassandra.cluster import Cluster
|
||||||
|
|
@ -215,8 +231,7 @@ class LibraryTableStore:
|
||||||
|
|
||||||
metadata = [
|
metadata = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in document.metadata
|
for v in document.metadata
|
||||||
]
|
]
|
||||||
|
|
@ -249,8 +264,7 @@ class LibraryTableStore:
|
||||||
|
|
||||||
metadata = [
|
metadata = [
|
||||||
(
|
(
|
||||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
*term_to_tuple(v.s), *term_to_tuple(v.p), *term_to_tuple(v.o)
|
||||||
v.o.value, v.o.is_uri
|
|
||||||
)
|
)
|
||||||
for v in document.metadata
|
for v in document.metadata
|
||||||
]
|
]
|
||||||
|
|
@ -331,9 +345,9 @@ class LibraryTableStore:
|
||||||
comments = row[4],
|
comments = row[4],
|
||||||
metadata = [
|
metadata = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=m[0], is_uri=m[1]),
|
s=tuple_to_term(m[0], m[1]),
|
||||||
p=Value(value=m[2], is_uri=m[3]),
|
p=tuple_to_term(m[2], m[3]),
|
||||||
o=Value(value=m[4], is_uri=m[5])
|
o=tuple_to_term(m[4], m[5])
|
||||||
)
|
)
|
||||||
for m in row[5]
|
for m in row[5]
|
||||||
],
|
],
|
||||||
|
|
@ -376,9 +390,9 @@ class LibraryTableStore:
|
||||||
comments = row[3],
|
comments = row[3],
|
||||||
metadata = [
|
metadata = [
|
||||||
Triple(
|
Triple(
|
||||||
s=Value(value=m[0], is_uri=m[1]),
|
s=tuple_to_term(m[0], m[1]),
|
||||||
p=Value(value=m[2], is_uri=m[3]),
|
p=tuple_to_term(m[2], m[3]),
|
||||||
o=Value(value=m[4], is_uri=m[5])
|
o=tuple_to_term(m[4], m[5])
|
||||||
)
|
)
|
||||||
for m in row[4]
|
for m in row[4]
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"pulsar-client",
|
"pulsar-client",
|
||||||
"prometheus-client",
|
"prometheus-client",
|
||||||
"boto3",
|
"boto3",
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"trustgraph-base>=1.9,<1.10",
|
"trustgraph-base>=2.0,<2.1",
|
||||||
"pulsar-client",
|
"pulsar-client",
|
||||||
"google-cloud-aiplatform",
|
"google-cloud-aiplatform",
|
||||||
"prometheus-client",
|
"prometheus-client",
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue