2025-07-14 14:57:44 +01:00
"""
Tests for Cassandra triples query service
"""
import pytest
from unittest . mock import MagicMock , patch
2026-01-27 13:48:08 +00:00
from trustgraph . query . triples . cassandra . service import Processor , create_term
from trustgraph . schema import Term , IRI , LITERAL
2025-07-14 14:57:44 +01:00
class TestCassandraQueryProcessor :
""" Test cases for Cassandra query processor """
@pytest.fixture
def processor ( self ) :
""" Create a processor instance for testing """
return Processor (
taskgroup = MagicMock ( ) ,
id = ' test-cassandra-query ' ,
graph_host = ' localhost '
)
2026-01-27 13:48:08 +00:00
def test_create_term_with_http_uri ( self , processor ) :
""" Test create_term with HTTP URI """
result = create_term ( " http://example.com/resource " )
2025-07-14 14:57:44 +01:00
2026-01-27 13:48:08 +00:00
assert isinstance ( result , Term )
assert result . iri == " http://example.com/resource "
assert result . type == IRI
2025-07-14 14:57:44 +01:00
2026-01-27 13:48:08 +00:00
def test_create_term_with_https_uri ( self , processor ) :
""" Test create_term with HTTPS URI """
result = create_term ( " https://example.com/resource " )
assert isinstance ( result , Term )
assert result . iri == " https://example.com/resource "
assert result . type == IRI
def test_create_term_with_literal ( self , processor ) :
""" Test create_term with literal value """
result = create_term ( " just a literal string " )
assert isinstance ( result , Term )
2025-07-14 14:57:44 +01:00
assert result . value == " just a literal string "
2026-01-27 13:48:08 +00:00
assert result . type == LITERAL
2025-07-14 14:57:44 +01:00
2026-01-27 13:48:08 +00:00
def test_create_term_with_empty_string ( self , processor ) :
""" Test create_term with empty string """
result = create_term ( " " )
assert isinstance ( result , Term )
2025-07-14 14:57:44 +01:00
assert result . value == " "
2026-01-27 13:48:08 +00:00
assert result . type == LITERAL
2025-07-14 14:57:44 +01:00
2026-01-27 13:48:08 +00:00
def test_create_term_with_partial_uri ( self , processor ) :
""" Test create_term with string that looks like URI but isn ' t complete """
result = create_term ( " http " )
assert isinstance ( result , Term )
2025-07-14 14:57:44 +01:00
assert result . value == " http "
2026-01-27 13:48:08 +00:00
assert result . type == LITERAL
2025-07-14 14:57:44 +01:00
2026-01-27 13:48:08 +00:00
def test_create_term_with_ftp_uri ( self , processor ) :
""" Test create_term with FTP URI (should not be detected as URI) """
result = create_term ( " ftp://example.com/file " )
assert isinstance ( result , Term )
2025-07-14 14:57:44 +01:00
assert result . value == " ftp://example.com/file "
2026-01-27 13:48:08 +00:00
assert result . type == LITERAL
2025-07-14 14:57:44 +01:00
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_spo_query ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test querying triples with subject, predicate, and object specified """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
# Setup mock TrustGraph via factory function
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2026-01-27 13:48:08 +00:00
# SPO query returns a list of results (with mock graph attribute)
mock_result = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
mock_result . o = ' test_object '
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_spo . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor (
taskgroup = MagicMock ( ) ,
id = ' test-cassandra-query ' ,
2025-09-04 00:58:11 +01:00
cassandra_host = ' localhost '
2025-07-14 14:57:44 +01:00
)
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# Create query request with all SPO values
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-07-14 14:57:44 +01:00
limit = 100
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2025-09-18 15:57:52 +01:00
# Verify KnowledgeGraph was created with correct parameters
2026-02-16 13:26:43 +00:00
mock_kg_class . assert_called_once_with (
2025-07-14 14:57:44 +01:00
hosts = [ ' localhost ' ] ,
2025-09-18 15:57:52 +01:00
keyspace = ' test_user '
2025-07-14 14:57:44 +01:00
)
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# Verify get_spo was called with correct parameters
mock_tg_instance . get_spo . assert_called_once_with (
2026-01-27 13:48:08 +00:00
' test_collection ' , ' test_subject ' , ' test_predicate ' , ' test_object ' , g = None , limit = 100
2025-07-14 14:57:44 +01:00
)
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# Verify result contains the queried triple
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' test_subject '
assert result [ 0 ] . p . iri == ' test_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' test_object '
def test_processor_initialization_with_defaults ( self ) :
""" Test processor initialization with default parameters """
taskgroup_mock = MagicMock ( )
processor = Processor ( taskgroup = taskgroup_mock )
2025-09-04 00:58:11 +01:00
assert processor . cassandra_host == [ ' cassandra ' ] # Updated default
assert processor . cassandra_username is None
assert processor . cassandra_password is None
2025-07-14 14:57:44 +01:00
assert processor . table is None
def test_processor_initialization_with_custom_params ( self ) :
""" Test processor initialization with custom parameters """
taskgroup_mock = MagicMock ( )
processor = Processor (
taskgroup = taskgroup_mock ,
2025-09-04 00:58:11 +01:00
cassandra_host = ' cassandra.example.com ' ,
cassandra_username = ' queryuser ' ,
cassandra_password = ' querypass '
2025-07-14 14:57:44 +01:00
)
2025-09-04 00:58:11 +01:00
assert processor . cassandra_host == [ ' cassandra.example.com ' ]
assert processor . cassandra_username == ' queryuser '
assert processor . cassandra_password == ' querypass '
2025-07-14 14:57:44 +01:00
assert processor . table is None
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_sp_pattern ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test SP query pattern (subject and predicate, no object) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
# Setup mock TrustGraph via factory function
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_result = MagicMock ( )
mock_result . o = ' result_object '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_sp . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
2025-07-14 14:57:44 +01:00
o = None ,
limit = 50
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_sp . assert_called_once_with ( ' test_collection ' , ' test_subject ' , ' test_predicate ' , g = None , limit = 50 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' test_subject '
assert result [ 0 ] . p . iri == ' test_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_s_pattern ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test S query pattern (subject only) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_result = MagicMock ( )
mock_result . p = ' result_predicate '
mock_result . o = ' result_object '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_s . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
2025-07-14 14:57:44 +01:00
p = None ,
o = None ,
limit = 25
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_s . assert_called_once_with ( ' test_collection ' , ' test_subject ' , g = None , limit = 25 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' test_subject '
assert result [ 0 ] . p . iri == ' result_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_p_pattern ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test P query pattern (predicate only) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
mock_result . o = ' result_object '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_p . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
s = None ,
2026-01-27 13:48:08 +00:00
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
2025-07-14 14:57:44 +01:00
o = None ,
limit = 10
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_p . assert_called_once_with ( ' test_collection ' , ' test_predicate ' , g = None , limit = 10 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' result_subject '
assert result [ 0 ] . p . iri == ' test_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' result_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_o_pattern ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test O query pattern (object only) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
mock_result . p = ' result_predicate '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_o . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
s = None ,
p = None ,
2026-01-27 13:48:08 +00:00
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-07-14 14:57:44 +01:00
limit = 75
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_o . assert_called_once_with ( ' test_collection ' , ' test_object ' , g = None , limit = 75 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' result_subject '
assert result [ 0 ] . p . iri == ' result_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_get_all_pattern ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test query pattern with no constraints (get all) """
from trustgraph . schema import TriplesQueryRequest
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_result = MagicMock ( )
mock_result . s = ' all_subject '
mock_result . p = ' all_predicate '
mock_result . o = ' all_object '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_all . return_value = [ mock_result ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
s = None ,
p = None ,
o = None ,
limit = 1000
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2025-09-18 15:57:52 +01:00
mock_tg_instance . get_all . assert_called_once_with ( ' test_collection ' , limit = 1000 )
2025-07-14 14:57:44 +01:00
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' all_subject '
assert result [ 0 ] . p . iri == ' all_predicate '
2025-07-14 14:57:44 +01:00
assert result [ 0 ] . o . value == ' all_object '
def test_add_args_method ( self ) :
""" Test that add_args properly configures argument parser """
from argparse import ArgumentParser
parser = ArgumentParser ( )
# Mock the parent class add_args method
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) as mock_parent_add_args :
Processor . add_args ( parser )
# Verify parent add_args was called
mock_parent_add_args . assert_called_once_with ( parser )
# Verify our specific arguments were added
args = parser . parse_args ( [ ] )
2025-09-03 23:41:22 +01:00
assert hasattr ( args , ' cassandra_host ' )
assert args . cassandra_host == ' cassandra ' # Updated to new parameter name and default
assert hasattr ( args , ' cassandra_username ' )
assert args . cassandra_username is None
assert hasattr ( args , ' cassandra_password ' )
assert args . cassandra_password is None
2025-07-14 14:57:44 +01:00
def test_add_args_with_custom_values ( self ) :
""" Test add_args with custom command line values """
from argparse import ArgumentParser
parser = ArgumentParser ( )
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) :
Processor . add_args ( parser )
2025-09-03 23:41:22 +01:00
# Test parsing with custom values (new cassandra_* arguments)
2025-07-14 14:57:44 +01:00
args = parser . parse_args ( [
2025-09-03 23:41:22 +01:00
' --cassandra-host ' , ' query.cassandra.com ' ,
' --cassandra-username ' , ' queryuser ' ,
' --cassandra-password ' , ' querypass '
2025-07-14 14:57:44 +01:00
] )
2025-09-03 23:41:22 +01:00
assert args . cassandra_host == ' query.cassandra.com '
assert args . cassandra_username == ' queryuser '
assert args . cassandra_password == ' querypass '
2025-07-14 14:57:44 +01:00
def test_add_args_short_form ( self ) :
""" Test add_args with short form arguments """
from argparse import ArgumentParser
parser = ArgumentParser ( )
with patch ( ' trustgraph.query.triples.cassandra.service.TriplesQueryService.add_args ' ) :
Processor . add_args ( parser )
2025-09-03 23:41:22 +01:00
# Test parsing with cassandra arguments (no short form)
args = parser . parse_args ( [ ' --cassandra-host ' , ' short.query.com ' ] )
2025-07-14 14:57:44 +01:00
2025-09-03 23:41:22 +01:00
assert args . cassandra_host == ' short.query.com '
2025-07-14 14:57:44 +01:00
@patch ( ' trustgraph.query.triples.cassandra.service.Processor.launch ' )
def test_run_function ( self , mock_launch ) :
""" Test the run function calls Processor.launch with correct parameters """
from trustgraph . query . triples . cassandra . service import run , default_ident
run ( )
2026-01-27 13:48:08 +00:00
mock_launch . assert_called_once_with ( default_ident , ' \n Triples query service. Input is a (s, p, o, g) quad pattern, some values may be \n null. Output is a list of quads. \n ' )
2025-07-14 14:57:44 +01:00
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_with_authentication ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test querying with username and password authentication """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2026-01-27 13:48:08 +00:00
# SPO query returns a list of results
mock_result = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
mock_result . o = ' test_object '
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_spo . return_value = [ mock_result ]
2025-07-14 14:57:44 +01:00
processor = Processor (
taskgroup = MagicMock ( ) ,
2025-09-04 00:58:11 +01:00
cassandra_username = ' authuser ' ,
cassandra_password = ' authpass '
2025-07-14 14:57:44 +01:00
)
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-07-14 14:57:44 +01:00
limit = 100
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2025-09-18 15:57:52 +01:00
# Verify KnowledgeGraph was created with authentication
2026-02-16 13:26:43 +00:00
mock_kg_class . assert_called_once_with (
2025-09-03 23:41:22 +01:00
hosts = [ ' cassandra ' ] , # Updated default
2025-07-14 14:57:44 +01:00
keyspace = ' test_user ' ,
username = ' authuser ' ,
password = ' authpass '
)
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_table_reuse ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test that TrustGraph is reused for same table """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2026-01-27 13:48:08 +00:00
# SPO query returns a list of results
mock_result = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
mock_result . o = ' test_object '
2026-01-27 13:48:08 +00:00
mock_tg_instance . get_spo . return_value = [ mock_result ]
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-01-27 13:48:08 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-07-14 14:57:44 +01:00
limit = 100
)
2026-01-27 13:48:08 +00:00
2025-07-14 14:57:44 +01:00
# First query should create TrustGraph
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
assert mock_kg_class . call_count == 1
2026-01-27 13:48:08 +00:00
2025-07-14 14:57:44 +01:00
# Second query with same table should reuse TrustGraph
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
assert mock_kg_class . call_count == 1 # Should not increase
2025-07-14 14:57:44 +01:00
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_table_switching ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test table switching creates new TrustGraph """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance1 = MagicMock ( )
mock_tg_instance2 = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . side_effect = [ mock_tg_instance1 , mock_tg_instance2 ]
# Setup mock results for both instances
mock_result = MagicMock ( )
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
mock_result . p = ' p '
mock_result . o = ' o '
mock_tg_instance1 . get_s . return_value = [ mock_result ]
mock_tg_instance2 . get_s . return_value = [ mock_result ]
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# First query
query1 = TriplesQueryRequest (
collection = ' collection1 ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
2025-07-14 14:57:44 +01:00
p = None ,
o = None ,
limit = 100
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' user1 ' , query1 )
2025-09-18 15:57:52 +01:00
assert processor . table == ' user1 '
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# Second query with different table
query2 = TriplesQueryRequest (
collection = ' collection2 ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
2025-07-14 14:57:44 +01:00
p = None ,
o = None ,
limit = 100
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' user2 ' , query2 )
2025-09-18 15:57:52 +01:00
assert processor . table == ' user2 '
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
# Verify TrustGraph was created twice
2026-02-16 13:26:43 +00:00
assert mock_kg_class . call_count == 2
2025-07-14 14:57:44 +01:00
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_exception_handling ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test exception handling during query execution """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_spo . side_effect = Exception ( " Query failed " )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-07-14 14:57:44 +01:00
limit = 100
)
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
with pytest . raises ( Exception , match = " Query failed " ) :
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' test_user ' , query )
2025-07-14 14:57:44 +01:00
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_query_triples_multiple_results ( self , mock_kg_class ) :
2025-07-14 14:57:44 +01:00
""" Test query returning multiple results """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-07-14 14:57:44 +01:00
# Mock multiple results
mock_result1 = MagicMock ( )
mock_result1 . o = ' object1 '
2026-02-16 13:26:43 +00:00
mock_result1 . g = ' '
mock_result1 . otype = None
mock_result1 . dtype = None
mock_result1 . lang = None
2025-07-14 14:57:44 +01:00
mock_result2 = MagicMock ( )
mock_result2 . o = ' object2 '
2026-02-16 13:26:43 +00:00
mock_result2 . g = ' '
mock_result2 . otype = None
mock_result2 . dtype = None
mock_result2 . lang = None
2025-07-14 14:57:44 +01:00
mock_tg_instance . get_sp . return_value = [ mock_result1 , mock_result2 ]
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
processor = Processor ( taskgroup = MagicMock ( ) )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
2025-07-14 14:57:44 +01:00
o = None ,
limit = 100
)
2026-02-16 13:26:43 +00:00
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2026-02-16 13:26:43 +00:00
2025-07-14 14:57:44 +01:00
assert len ( result ) == 2
assert result [ 0 ] . o . value == ' object1 '
2025-09-18 19:52:05 +01:00
assert result [ 1 ] . o . value == ' object2 '
class TestCassandraQueryPerformanceOptimizations :
""" Test cases for multi-table performance optimizations in query service """
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_get_po_query_optimization ( self , mock_kg_class ) :
2025-09-18 19:52:05 +01:00
""" Test that get_po queries use optimized table (no ALLOW FILTERING) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2025-09-18 19:52:05 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-09-18 19:52:05 +01:00
mock_result = MagicMock ( )
mock_result . s = ' result_subject '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-09-18 19:52:05 +01:00
mock_tg_instance . get_po . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
# PO query pattern (predicate + object, find subjects)
query = TriplesQueryRequest (
collection = ' test_collection ' ,
s = None ,
2026-01-27 13:48:08 +00:00
p = Term ( type = LITERAL , value = ' test_predicate ' ) ,
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-09-18 19:52:05 +01:00
limit = 50
)
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2025-09-18 19:52:05 +01:00
# Verify get_po was called (should use optimized po_table)
mock_tg_instance . get_po . assert_called_once_with (
2026-01-27 13:48:08 +00:00
' test_collection ' , ' test_predicate ' , ' test_object ' , g = None , limit = 50
2025-09-18 19:52:05 +01:00
)
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' result_subject '
assert result [ 0 ] . p . iri == ' test_predicate '
2025-09-18 19:52:05 +01:00
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_get_os_query_optimization ( self , mock_kg_class ) :
2025-09-18 19:52:05 +01:00
""" Test that get_os queries use optimized table (no ALLOW FILTERING) """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2025-09-18 19:52:05 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-09-18 19:52:05 +01:00
mock_result = MagicMock ( )
mock_result . p = ' result_predicate '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-09-18 19:52:05 +01:00
mock_tg_instance . get_os . return_value = [ mock_result ]
processor = Processor ( taskgroup = MagicMock ( ) )
# OS query pattern (object + subject, find predicates)
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = ' test_subject ' ) ,
2025-09-18 19:52:05 +01:00
p = None ,
2026-01-27 13:48:08 +00:00
o = Term ( type = LITERAL , value = ' test_object ' ) ,
2025-09-18 19:52:05 +01:00
limit = 25
)
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' test_user ' , query )
2025-09-18 19:52:05 +01:00
# Verify get_os was called (should use optimized subject_table with clustering)
mock_tg_instance . get_os . assert_called_once_with (
2026-01-27 13:48:08 +00:00
' test_collection ' , ' test_object ' , ' test_subject ' , g = None , limit = 25
2025-09-18 19:52:05 +01:00
)
assert len ( result ) == 1
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert result [ 0 ] . s . iri == ' test_subject '
assert result [ 0 ] . p . iri == ' result_predicate '
2025-09-18 19:52:05 +01:00
assert result [ 0 ] . o . value == ' test_object '
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_all_query_patterns_use_correct_tables ( self , mock_kg_class ) :
2025-09-18 19:52:05 +01:00
""" Test that all query patterns route to their optimal tables """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2025-09-18 19:52:05 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-09-18 19:52:05 +01:00
# Mock empty results for all queries
mock_tg_instance . get_all . return_value = [ ]
mock_tg_instance . get_s . return_value = [ ]
mock_tg_instance . get_p . return_value = [ ]
mock_tg_instance . get_o . return_value = [ ]
mock_tg_instance . get_sp . return_value = [ ]
mock_tg_instance . get_po . return_value = [ ]
mock_tg_instance . get_os . return_value = [ ]
mock_tg_instance . get_spo . return_value = [ ]
processor = Processor ( taskgroup = MagicMock ( ) )
# Test each query pattern
test_patterns = [
# (s, p, o, expected_method)
( None , None , None , ' get_all ' ) , # All triples
( ' s1 ' , None , None , ' get_s ' ) , # Subject only
( None , ' p1 ' , None , ' get_p ' ) , # Predicate only
( None , None , ' o1 ' , ' get_o ' ) , # Object only
( ' s1 ' , ' p1 ' , None , ' get_sp ' ) , # Subject + Predicate
( None , ' p1 ' , ' o1 ' , ' get_po ' ) , # Predicate + Object (CRITICAL OPTIMIZATION)
( ' s1 ' , None , ' o1 ' , ' get_os ' ) , # Object + Subject
( ' s1 ' , ' p1 ' , ' o1 ' , ' get_spo ' ) , # All three
]
for s , p , o , expected_method in test_patterns :
# Reset mock call counts
mock_tg_instance . reset_mock ( )
query = TriplesQueryRequest (
collection = ' test_collection ' ,
2026-01-27 13:48:08 +00:00
s = Term ( type = LITERAL , value = s ) if s else None ,
p = Term ( type = LITERAL , value = p ) if p else None ,
o = Term ( type = LITERAL , value = o ) if o else None ,
2025-09-18 19:52:05 +01:00
limit = 10
)
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
await processor . query_triples ( ' test_user ' , query )
2025-09-18 19:52:05 +01:00
# Verify the correct method was called
method = getattr ( mock_tg_instance , expected_method )
assert method . called , f " Expected { expected_method } to be called for pattern s= { s } , p= { p } , o= { o } "
def test_legacy_vs_optimized_mode_configuration ( self ) :
""" Test that environment variable controls query optimization mode """
taskgroup_mock = MagicMock ( )
# Test optimized mode (default)
with patch . dict ( ' os.environ ' , { } , clear = True ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
# Test legacy mode
with patch . dict ( ' os.environ ' , { ' CASSANDRA_USE_LEGACY ' : ' true ' } ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
# Test explicit optimized mode
with patch . dict ( ' os.environ ' , { ' CASSANDRA_USE_LEGACY ' : ' false ' } ) :
processor = Processor ( taskgroup = taskgroup_mock )
# Mode is determined in KnowledgeGraph initialization
@pytest.mark.asyncio
2026-02-16 13:26:43 +00:00
@patch ( ' trustgraph.query.triples.cassandra.service.EntityCentricKnowledgeGraph ' )
async def test_performance_critical_po_query_no_filtering ( self , mock_kg_class ) :
2025-09-18 19:52:05 +01:00
""" Test the performance-critical PO query that eliminates ALLOW FILTERING """
2026-01-27 13:48:08 +00:00
from trustgraph . schema import TriplesQueryRequest , Term , IRI , LITERAL
2025-09-18 19:52:05 +01:00
mock_tg_instance = MagicMock ( )
2026-02-16 13:26:43 +00:00
mock_kg_class . return_value = mock_tg_instance
2025-09-18 19:52:05 +01:00
# Mock multiple subjects for the same predicate-object pair
mock_results = [ ]
for i in range ( 5 ) :
mock_result = MagicMock ( )
mock_result . s = f ' subject_ { i } '
2026-02-16 13:26:43 +00:00
mock_result . g = ' '
mock_result . otype = None
mock_result . dtype = None
mock_result . lang = None
2025-09-18 19:52:05 +01:00
mock_results . append ( mock_result )
mock_tg_instance . get_po . return_value = mock_results
processor = Processor ( taskgroup = MagicMock ( ) )
# This is the query pattern that was slow with ALLOW FILTERING
query = TriplesQueryRequest (
collection = ' massive_collection ' ,
s = None ,
2026-01-27 13:48:08 +00:00
p = Term ( type = IRI , iri = ' http://www.w3.org/1999/02/22-rdf-syntax-ns#type ' ) ,
o = Term ( type = IRI , iri = ' http://example.com/Person ' ) ,
2025-09-18 19:52:05 +01:00
limit = 1000
)
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
result = await processor . query_triples ( ' large_dataset_user ' , query )
2025-09-18 19:52:05 +01:00
# Verify optimized get_po was used (no ALLOW FILTERING needed!)
mock_tg_instance . get_po . assert_called_once_with (
' massive_collection ' ,
' http://www.w3.org/1999/02/22-rdf-syntax-ns#type ' ,
' http://example.com/Person ' ,
2026-01-27 13:48:08 +00:00
g = None ,
2025-09-18 19:52:05 +01:00
limit = 1000
)
# Verify all results were returned
assert len ( result ) == 5
for i , triple in enumerate ( result ) :
GraphRAG Query-Time Explainability (#677)
Implements full explainability pipeline for GraphRAG queries, enabling
traceability from answers back to source documents.
Renamed throughout for clarity:
- provenance_callback → explain_callback
- provenance_id → explain_id
- provenance_collection → explain_collection
- message_type "provenance" → "explain"
- Queue name "provenance" → "explainability"
GraphRAG queries now emit explainability events as they execute:
1. Session - query text and timestamp
2. Retrieval - edges retrieved from subgraph
3. Selection - selected edges with LLM reasoning (JSONL with id +
reasoning)
4. Answer - reference to synthesized response
Events stream via explain_callback during query(), enabling
real-time UX.
- Answers stored in librarian service (not inline in graph - too large)
- Document ID as URN: urn:trustgraph:answer:{session_id}
- Graph stores tg:document reference (IRI) to librarian document
- Added librarian producer/consumer to graph-rag service
- get_labelgraph() now returns (labeled_edges, uri_map)
- uri_map maps edge_id(label_s, label_p, label_o) →
(uri_s, uri_p, uri_o)
- Explainability data stores original URIs, not labels
- Enables tracing edges back to reifying statements via tg:reifies
- Added serialize_triple() to query service (matches storage format)
- get_term_value() now handles TRIPLE type terms
- Enables querying by quoted triple in object position:
?stmt tg:reifies <<s p o>>
- Displays real-time explainability events during query
- Resolves rdfs:label for edge components (s, p, o)
- Traces source chain via prov:wasDerivedFrom to root document
- Output: "Source: Chunk 1 → Page 2 → Document Title"
- Label caching to avoid repeated queries
GraphRagResponse:
- explain_id: str | None
- explain_collection: str | None
- message_type: str ("chunk" or "explain")
- end_of_session: bool
trustgraph-base/trustgraph/provenance/:
- namespaces.py - Added TG_DOCUMENT predicate
- triples.py - answer_triples() supports document_id reference
- uris.py - Added edge_selection_uri()
trustgraph-base/trustgraph/schema/services/retrieval.py:
- GraphRagResponse with explain_id, explain_collection, end_of_session
trustgraph-flow/trustgraph/retrieval/graph_rag/:
- graph_rag.py - URI preservation, streaming answer accumulation
- rag.py - Librarian integration, real-time explain emission
trustgraph-flow/trustgraph/query/triples/cassandra/service.py:
- Quoted triple serialization for query matching
trustgraph-cli/trustgraph/cli/invoke_graph_rag.py:
- Full explainability display with label resolution and source tracing
2026-03-10 10:00:01 +00:00
assert triple . s . iri == f ' subject_ { i } ' # Mock returns literal values
2026-01-27 13:48:08 +00:00
assert triple . p . iri == ' http://www.w3.org/1999/02/22-rdf-syntax-ns#type '
assert triple . p . type == IRI
assert triple . o . iri == ' http://example.com/Person ' # URIs use .iri
assert triple . o . type == IRI