2025-08-13 16:07:58 +01:00
|
|
|
"""
|
|
|
|
|
Unit tests for the load_knowledge CLI module.
|
|
|
|
|
|
|
|
|
|
Tests the business logic of loading triples and entity contexts from Turtle files
|
2025-12-04 21:11:56 +00:00
|
|
|
using the BulkClient API.
|
2025-08-13 16:07:58 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
import tempfile
|
2025-12-04 21:11:56 +00:00
|
|
|
from unittest.mock import Mock, patch, MagicMock, call
|
2025-08-13 16:07:58 +01:00
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
from trustgraph.cli.load_knowledge import KnowledgeLoader, main
|
2025-12-04 21:11:56 +00:00
|
|
|
from trustgraph.api import Triple
|
2025-08-13 16:07:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def sample_turtle_content():
|
|
|
|
|
"""Sample Turtle RDF content for testing."""
|
|
|
|
|
return """
|
|
|
|
|
@prefix ex: <http://example.org/> .
|
|
|
|
|
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
|
|
|
|
|
|
|
|
|
|
ex:john foaf:name "John Smith" ;
|
|
|
|
|
foaf:age "30" ;
|
|
|
|
|
foaf:knows ex:mary .
|
|
|
|
|
|
|
|
|
|
ex:mary foaf:name "Mary Johnson" ;
|
|
|
|
|
foaf:email "mary@example.com" .
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def temp_turtle_file(sample_turtle_content):
|
|
|
|
|
"""Create a temporary Turtle file for testing."""
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write(sample_turtle_content)
|
|
|
|
|
f.flush()
|
|
|
|
|
yield f.name
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Cleanup
|
|
|
|
|
Path(f.name).unlink(missing_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def knowledge_loader():
|
|
|
|
|
"""Create a KnowledgeLoader instance with test parameters."""
|
|
|
|
|
return KnowledgeLoader(
|
|
|
|
|
files=["test.ttl"],
|
|
|
|
|
flow="test-flow",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace="test-user",
|
2025-08-13 16:07:58 +01:00
|
|
|
collection="test-collection",
|
|
|
|
|
document_id="test-doc-123",
|
2025-12-04 21:11:56 +00:00
|
|
|
url="http://test.example.com/",
|
|
|
|
|
token=None
|
2025-08-13 16:07:58 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestKnowledgeLoader:
|
|
|
|
|
"""Test the KnowledgeLoader class business logic."""
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_init_stores_parameters_correctly(self):
|
|
|
|
|
"""Test that initialization stores parameters correctly."""
|
2025-08-13 16:07:58 +01:00
|
|
|
loader = KnowledgeLoader(
|
2025-12-04 21:11:56 +00:00
|
|
|
files=["file1.ttl", "file2.ttl"],
|
2025-08-13 16:07:58 +01:00
|
|
|
flow="my-flow",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace="user1",
|
2025-12-04 21:11:56 +00:00
|
|
|
collection="col1",
|
2025-08-13 16:07:58 +01:00
|
|
|
document_id="doc1",
|
2025-12-04 21:11:56 +00:00
|
|
|
url="http://example.com/",
|
|
|
|
|
token="test-token"
|
2025-08-13 16:07:58 +01:00
|
|
|
)
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
assert loader.files == ["file1.ttl", "file2.ttl"]
|
|
|
|
|
assert loader.flow == "my-flow"
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
assert loader.workspace == "user1"
|
2025-08-13 16:07:58 +01:00
|
|
|
assert loader.collection == "col1"
|
|
|
|
|
assert loader.document_id == "doc1"
|
2025-12-04 21:11:56 +00:00
|
|
|
assert loader.url == "http://example.com/"
|
|
|
|
|
assert loader.token == "test-token"
|
|
|
|
|
|
|
|
|
|
def test_load_triples_from_file_yields_triples(self, temp_turtle_file, knowledge_loader):
|
|
|
|
|
"""Test that load_triples_from_file yields Triple objects."""
|
|
|
|
|
triples = list(knowledge_loader.load_triples_from_file(temp_turtle_file))
|
|
|
|
|
|
|
|
|
|
# Should have triples for all statements in the file
|
|
|
|
|
assert len(triples) > 0
|
|
|
|
|
|
|
|
|
|
# Verify they are Triple objects
|
|
|
|
|
for triple in triples:
|
|
|
|
|
assert isinstance(triple, Triple)
|
|
|
|
|
assert hasattr(triple, 's')
|
|
|
|
|
assert hasattr(triple, 'p')
|
|
|
|
|
assert hasattr(triple, 'o')
|
|
|
|
|
assert isinstance(triple.s, str)
|
|
|
|
|
assert isinstance(triple.p, str)
|
|
|
|
|
assert isinstance(triple.o, str)
|
|
|
|
|
|
|
|
|
|
def test_load_entity_contexts_from_file_yields_literals_only(self, temp_turtle_file, knowledge_loader):
|
|
|
|
|
"""Test that entity contexts are created only for literals."""
|
|
|
|
|
contexts = list(knowledge_loader.load_entity_contexts_from_file(temp_turtle_file))
|
2025-08-13 16:07:58 +01:00
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
# Should have contexts for literal objects (foaf:name, foaf:age, foaf:email)
|
|
|
|
|
assert len(contexts) > 0
|
2025-08-13 16:07:58 +01:00
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
# Verify format: (entity, context) tuples
|
|
|
|
|
for entity, context in contexts:
|
|
|
|
|
assert isinstance(entity, str)
|
|
|
|
|
assert isinstance(context, str)
|
|
|
|
|
# Entity should be a URI (subject)
|
|
|
|
|
assert entity.startswith("http://")
|
|
|
|
|
|
|
|
|
|
def test_load_entity_contexts_skips_uri_objects(self):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test that URI objects don't generate entity contexts."""
|
|
|
|
|
# Create turtle with only URI objects (no literals)
|
|
|
|
|
turtle_content = """
|
|
|
|
|
@prefix ex: <http://example.org/> .
|
|
|
|
|
ex:john ex:knows ex:mary .
|
|
|
|
|
ex:mary ex:knows ex:bob .
|
|
|
|
|
"""
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write(turtle_content)
|
|
|
|
|
f.flush()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
loader = KnowledgeLoader(
|
|
|
|
|
files=[f.name],
|
|
|
|
|
flow="test-flow",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace="test-user",
|
2025-08-13 16:07:58 +01:00
|
|
|
collection="test-collection",
|
2025-12-04 21:11:56 +00:00
|
|
|
document_id="test-doc",
|
|
|
|
|
url="http://test.example.com/"
|
2025-08-13 16:07:58 +01:00
|
|
|
)
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
contexts = list(loader.load_entity_contexts_from_file(f.name))
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
Path(f.name).unlink(missing_ok=True)
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
# Should have no contexts since there are no literals
|
|
|
|
|
assert len(contexts) == 0
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.cli.load_knowledge.Api')
|
|
|
|
|
def test_run_calls_bulk_api(self, mock_api_class, temp_turtle_file):
|
|
|
|
|
"""Test that run() uses BulkClient API."""
|
|
|
|
|
# Setup mocks
|
|
|
|
|
mock_api = MagicMock()
|
|
|
|
|
mock_bulk = MagicMock()
|
|
|
|
|
mock_api_class.return_value = mock_api
|
|
|
|
|
mock_api.bulk.return_value = mock_bulk
|
|
|
|
|
|
|
|
|
|
loader = KnowledgeLoader(
|
|
|
|
|
files=[temp_turtle_file],
|
|
|
|
|
flow="test-flow",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace="test-user",
|
2025-12-04 21:11:56 +00:00
|
|
|
collection="test-collection",
|
|
|
|
|
document_id="test-doc",
|
|
|
|
|
url="http://test.example.com/",
|
|
|
|
|
token="test-token"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
loader.run()
|
|
|
|
|
|
|
|
|
|
# Verify Api was created with correct parameters
|
|
|
|
|
mock_api_class.assert_called_once_with(
|
|
|
|
|
url="http://test.example.com/",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
token="test-token",
|
|
|
|
|
workspace="test-user"
|
2025-12-04 21:11:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Verify bulk client was obtained
|
|
|
|
|
mock_api.bulk.assert_called_once()
|
|
|
|
|
|
|
|
|
|
# Verify import_triples was called
|
|
|
|
|
assert mock_bulk.import_triples.call_count == 1
|
|
|
|
|
call_args = mock_bulk.import_triples.call_args
|
|
|
|
|
assert call_args[1]['flow'] == "test-flow"
|
|
|
|
|
assert call_args[1]['metadata']['id'] == "test-doc"
|
|
|
|
|
assert call_args[1]['metadata']['collection'] == "test-collection"
|
|
|
|
|
|
|
|
|
|
# Verify import_entity_contexts was called
|
|
|
|
|
assert mock_bulk.import_entity_contexts.call_count == 1
|
|
|
|
|
call_args = mock_bulk.import_entity_contexts.call_args
|
|
|
|
|
assert call_args[1]['flow'] == "test-flow"
|
|
|
|
|
assert call_args[1]['metadata']['id'] == "test-doc"
|
2025-08-13 16:07:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCLIArgumentParsing:
|
|
|
|
|
"""Test CLI argument parsing and main function."""
|
|
|
|
|
|
|
|
|
|
@patch('trustgraph.cli.load_knowledge.KnowledgeLoader')
|
2025-12-04 21:11:56 +00:00
|
|
|
@patch('trustgraph.cli.load_knowledge.time.sleep')
|
|
|
|
|
def test_main_parses_args_correctly(self, mock_sleep, mock_loader_class):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test that main() parses arguments correctly."""
|
|
|
|
|
mock_loader_instance = MagicMock()
|
|
|
|
|
mock_loader_class.return_value = mock_loader_instance
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
test_args = [
|
|
|
|
|
'tg-load-knowledge',
|
|
|
|
|
'-i', 'doc-123',
|
2025-12-04 21:11:56 +00:00
|
|
|
'-f', 'my-flow',
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
'-w', 'my-user',
|
2025-08-13 16:07:58 +01:00
|
|
|
'-C', 'my-collection',
|
2025-12-04 21:11:56 +00:00
|
|
|
'-u', 'http://custom.example.com/',
|
|
|
|
|
'-t', 'my-token',
|
2025-08-13 16:07:58 +01:00
|
|
|
'file1.ttl',
|
|
|
|
|
'file2.ttl'
|
|
|
|
|
]
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
with patch('sys.argv', test_args):
|
|
|
|
|
main()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Verify KnowledgeLoader was instantiated with correct args
|
|
|
|
|
mock_loader_class.assert_called_once_with(
|
|
|
|
|
document_id='doc-123',
|
2025-12-04 21:11:56 +00:00
|
|
|
url='http://custom.example.com/',
|
|
|
|
|
token='my-token',
|
2025-08-13 16:07:58 +01:00
|
|
|
flow='my-flow',
|
|
|
|
|
files=['file1.ttl', 'file2.ttl'],
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace='my-user',
|
2025-08-13 16:07:58 +01:00
|
|
|
collection='my-collection'
|
|
|
|
|
)
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
# Verify run was called
|
|
|
|
|
mock_loader_instance.run.assert_called_once()
|
2025-08-13 16:07:58 +01:00
|
|
|
|
|
|
|
|
@patch('trustgraph.cli.load_knowledge.KnowledgeLoader')
|
2025-12-04 21:11:56 +00:00
|
|
|
@patch('trustgraph.cli.load_knowledge.time.sleep')
|
|
|
|
|
def test_main_uses_defaults(self, mock_sleep, mock_loader_class):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test that main() uses default values when not specified."""
|
|
|
|
|
mock_loader_instance = MagicMock()
|
|
|
|
|
mock_loader_class.return_value = mock_loader_instance
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
test_args = [
|
|
|
|
|
'tg-load-knowledge',
|
|
|
|
|
'-i', 'doc-123',
|
|
|
|
|
'file1.ttl'
|
|
|
|
|
]
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
with patch('sys.argv', test_args):
|
|
|
|
|
main()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Verify defaults were used
|
|
|
|
|
call_args = mock_loader_class.call_args[1]
|
|
|
|
|
assert call_args['flow'] == 'default'
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
assert call_args['workspace'] == 'default'
|
2025-08-13 16:07:58 +01:00
|
|
|
assert call_args['collection'] == 'default'
|
2025-12-04 21:11:56 +00:00
|
|
|
assert call_args['url'] == 'http://localhost:8088/'
|
|
|
|
|
assert call_args['token'] is None
|
2025-08-13 16:07:58 +01:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestErrorHandling:
|
|
|
|
|
"""Test error handling scenarios."""
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_load_triples_handles_invalid_turtle(self, knowledge_loader):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test handling of invalid Turtle content."""
|
|
|
|
|
# Create file with invalid Turtle content
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write("Invalid Turtle Content {{{")
|
|
|
|
|
f.flush()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Should raise an exception for invalid Turtle
|
|
|
|
|
with pytest.raises(Exception):
|
2025-12-04 21:11:56 +00:00
|
|
|
list(knowledge_loader.load_triples_from_file(f.name))
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
Path(f.name).unlink(missing_ok=True)
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_load_entity_contexts_handles_invalid_turtle(self, knowledge_loader):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test handling of invalid Turtle content in entity contexts."""
|
|
|
|
|
# Create file with invalid Turtle content
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write("Invalid Turtle Content {{{")
|
|
|
|
|
f.flush()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Should raise an exception for invalid Turtle
|
|
|
|
|
with pytest.raises(Exception):
|
2025-12-04 21:11:56 +00:00
|
|
|
list(knowledge_loader.load_entity_contexts_from_file(f.name))
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
Path(f.name).unlink(missing_ok=True)
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
@patch('trustgraph.cli.load_knowledge.Api')
|
2025-08-13 16:07:58 +01:00
|
|
|
@patch('builtins.print') # Mock print to avoid output during tests
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_run_handles_api_errors(self, mock_print, mock_api_class, temp_turtle_file):
|
|
|
|
|
"""Test handling of API errors."""
|
|
|
|
|
# Mock API to raise an error
|
|
|
|
|
mock_api_class.side_effect = Exception("API connection failed")
|
|
|
|
|
|
|
|
|
|
loader = KnowledgeLoader(
|
|
|
|
|
files=[temp_turtle_file],
|
|
|
|
|
flow="test-flow",
|
feat: workspace-based multi-tenancy, replacing user as tenancy axis (#840)
Introduces `workspace` as the isolation boundary for config, flows,
library, and knowledge data. Removes `user` as a schema-level field
throughout the code, API specs, and tests; workspace provides the
same separation more cleanly at the trusted flow.workspace layer
rather than through client-supplied message fields.
Design
------
- IAM tech spec (docs/tech-specs/iam.md) documents current state,
proposed auth/access model, and migration direction.
- Data ownership model (docs/tech-specs/data-ownership-model.md)
captures the workspace/collection/flow hierarchy.
Schema + messaging
------------------
- Drop `user` field from AgentRequest/Step, GraphRagQuery,
DocumentRagQuery, Triples/Graph/Document/Row EmbeddingsRequest,
Sparql/Rows/Structured QueryRequest, ToolServiceRequest.
- Keep collection/workspace routing via flow.workspace at the
service layer.
- Translators updated to not serialise/deserialise user.
API specs
---------
- OpenAPI schemas and path examples cleaned of user fields.
- Websocket async-api messages updated.
- Removed the unused parameters/User.yaml.
Services + base
---------------
- Librarian, collection manager, knowledge, config: all operations
scoped by workspace. Config client API takes workspace as first
positional arg.
- `flow.workspace` set at flow start time by the infrastructure;
no longer pass-through from clients.
- Tool service drops user-personalisation passthrough.
CLI + SDK
---------
- tg-init-workspace and workspace-aware import/export.
- All tg-* commands drop user args; accept --workspace.
- Python API/SDK (flow, socket_client, async_*, explainability,
library) drop user kwargs from every method signature.
MCP server
----------
- All tool endpoints drop user parameters; socket_manager no longer
keyed per user.
Flow service
------------
- Closure-based topic cleanup on flow stop: only delete topics
whose blueprint template was parameterised AND no remaining
live flow (across all workspaces) still resolves to that topic.
Three scopes fall out naturally from template analysis:
* {id} -> per-flow, deleted on stop
* {blueprint} -> per-blueprint, kept while any flow of the
same blueprint exists
* {workspace} -> per-workspace, kept while any flow in the
workspace exists
* literal -> global, never deleted (e.g. tg.request.librarian)
Fixes a bug where stopping a flow silently destroyed the global
librarian exchange, wedging all library operations until manual
restart.
RabbitMQ backend
----------------
- heartbeat=60, blocked_connection_timeout=300. Catches silently
dead connections (broker restart, orphaned channels, network
partitions) within ~2 heartbeat windows, so the consumer
reconnects and re-binds its queue rather than sitting forever
on a zombie connection.
Tests
-----
- Full test refresh: unit, integration, contract, provenance.
- Dropped user-field assertions and constructor kwargs across
~100 test files.
- Renamed user-collection isolation tests to workspace-collection.
2026-04-21 23:23:01 +01:00
|
|
|
workspace="test-user",
|
2025-12-04 21:11:56 +00:00
|
|
|
collection="test-collection",
|
|
|
|
|
document_id="test-doc",
|
|
|
|
|
url="http://test.example.com/"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Should raise the exception
|
|
|
|
|
with pytest.raises(Exception, match="API connection failed"):
|
|
|
|
|
loader.run()
|
2025-08-13 16:07:58 +01:00
|
|
|
|
|
|
|
|
@patch('trustgraph.cli.load_knowledge.KnowledgeLoader')
|
|
|
|
|
@patch('trustgraph.cli.load_knowledge.time.sleep')
|
|
|
|
|
@patch('builtins.print') # Mock print to avoid output during tests
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_main_retries_on_exception(self, mock_print, mock_sleep, mock_loader_class):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test that main() retries on exceptions."""
|
|
|
|
|
mock_loader_instance = MagicMock()
|
|
|
|
|
mock_loader_class.return_value = mock_loader_instance
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# First call raises exception, second succeeds
|
2025-12-04 21:11:56 +00:00
|
|
|
mock_loader_instance.run.side_effect = [Exception("Test error"), None]
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
test_args = [
|
|
|
|
|
'tg-load-knowledge',
|
2025-12-04 21:11:56 +00:00
|
|
|
'-i', 'doc-123',
|
2025-08-13 16:07:58 +01:00
|
|
|
'file1.ttl'
|
|
|
|
|
]
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
with patch('sys.argv', test_args):
|
|
|
|
|
main()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Should have been called twice (first failed, second succeeded)
|
2025-12-04 21:11:56 +00:00
|
|
|
assert mock_loader_instance.run.call_count == 2
|
2025-08-13 16:07:58 +01:00
|
|
|
mock_sleep.assert_called_once_with(10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDataValidation:
|
|
|
|
|
"""Test data validation and edge cases."""
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_empty_turtle_file(self, knowledge_loader):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test handling of empty Turtle files."""
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write("") # Empty file
|
|
|
|
|
f.flush()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
triples = list(knowledge_loader.load_triples_from_file(f.name))
|
|
|
|
|
contexts = list(knowledge_loader.load_entity_contexts_from_file(f.name))
|
|
|
|
|
|
|
|
|
|
# Should return empty lists for empty file
|
|
|
|
|
assert len(triples) == 0
|
|
|
|
|
assert len(contexts) == 0
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
Path(f.name).unlink(missing_ok=True)
|
|
|
|
|
|
2025-12-04 21:11:56 +00:00
|
|
|
def test_turtle_with_mixed_literals_and_uris(self, knowledge_loader):
|
2025-08-13 16:07:58 +01:00
|
|
|
"""Test handling of Turtle with mixed literal and URI objects."""
|
|
|
|
|
turtle_content = """
|
|
|
|
|
@prefix ex: <http://example.org/> .
|
|
|
|
|
ex:john ex:name "John Smith" ;
|
|
|
|
|
ex:age "25" ;
|
|
|
|
|
ex:knows ex:mary ;
|
|
|
|
|
ex:city "New York" .
|
|
|
|
|
ex:mary ex:name "Mary Johnson" .
|
|
|
|
|
"""
|
2025-12-04 21:11:56 +00:00
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.ttl', delete=False) as f:
|
|
|
|
|
f.write(turtle_content)
|
|
|
|
|
f.flush()
|
2025-12-04 21:11:56 +00:00
|
|
|
|
|
|
|
|
contexts = list(knowledge_loader.load_entity_contexts_from_file(f.name))
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Should have 4 entity contexts (for the 4 literals: "John Smith", "25", "New York", "Mary Johnson")
|
|
|
|
|
# URI ex:mary should be skipped
|
2025-12-04 21:11:56 +00:00
|
|
|
assert len(contexts) == 4
|
|
|
|
|
|
2025-08-13 16:07:58 +01:00
|
|
|
# Verify all contexts are for literals (subjects should be URIs)
|
2025-12-04 21:11:56 +00:00
|
|
|
context_values = [context for entity, context in contexts]
|
|
|
|
|
|
|
|
|
|
assert "John Smith" in context_values
|
|
|
|
|
assert "25" in context_values
|
|
|
|
|
assert "New York" in context_values
|
|
|
|
|
assert "Mary Johnson" in context_values
|
|
|
|
|
|
|
|
|
|
Path(f.name).unlink(missing_ok=True)
|