From 30ca1d2e8b0754286376d50b77a0c0b6e729d4d0 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Fri, 9 Jan 2026 14:48:44 +0000 Subject: [PATCH 1/9] Add missing trustgraph-base dependency (#606) --- trustgraph-mcp/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/trustgraph-mcp/pyproject.toml b/trustgraph-mcp/pyproject.toml index c99b296e..48449831 100644 --- a/trustgraph-mcp/pyproject.toml +++ b/trustgraph-mcp/pyproject.toml @@ -12,6 +12,7 @@ requires-python = ">=3.8" dependencies = [ "mcp", "websockets", + "trustgraph-base", ] classifiers = [ "Programming Language :: Python :: 3", From 807f6cc4e22c709e785eaf174f04dbf0cf7e0fa5 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Mon, 12 Jan 2026 18:45:52 +0000 Subject: [PATCH 2/9] Fix non streaming RAG problems (#607) * Fix non-streaming failure in RAG services * Fix non-streaming failure in API * Fix agent non-streaming messaging * Agent messaging unit & contract tests --- .../test_translator_completion_flags.py | 242 ++++++++++++++++++ .../test_agent_service_non_streaming.py | 206 +++++++++++++++ .../test_document_rag_service.py | 54 ++++ .../test_retrieval/test_graph_rag_service.py | 134 ++++++++++ .../trustgraph/api/socket_client.py | 30 ++- .../trustgraph/base/agent_service.py | 8 +- .../trustgraph/messaging/translators/agent.py | 5 +- .../trustgraph/agent/react/service.py | 17 +- .../trustgraph/retrieval/document_rag/rag.py | 1 + .../trustgraph/retrieval/graph_rag/rag.py | 1 + 10 files changed, 677 insertions(+), 21 deletions(-) create mode 100644 tests/contract/test_translator_completion_flags.py create mode 100644 tests/unit/test_agent/test_agent_service_non_streaming.py create mode 100644 tests/unit/test_retrieval/test_graph_rag_service.py diff --git a/tests/contract/test_translator_completion_flags.py b/tests/contract/test_translator_completion_flags.py new file mode 100644 index 00000000..c01156ae --- /dev/null +++ b/tests/contract/test_translator_completion_flags.py @@ -0,0 +1,242 @@ +""" +Contract tests for message translator completion flag behavior. + +These tests verify that translators correctly compute the is_final flag +based on message fields like end_of_stream and end_of_dialog. +""" + +import pytest + +from trustgraph.schema import ( + GraphRagResponse, DocumentRagResponse, AgentResponse, Error +) +from trustgraph.messaging import TranslatorRegistry + + +@pytest.mark.contract +class TestRAGTranslatorCompletionFlags: + """Contract tests for RAG response translator completion flags""" + + def test_graph_rag_translator_is_final_with_end_of_stream_true(self): + """ + Test that GraphRagResponseTranslator returns is_final=True + when end_of_stream=True. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("graph-rag") + response = GraphRagResponse( + response="A small domesticated mammal.", + end_of_stream=True, + error=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is True, "is_final must be True when end_of_stream=True" + assert response_dict["response"] == "A small domesticated mammal." + assert response_dict["end_of_stream"] is True + + def test_graph_rag_translator_is_final_with_end_of_stream_false(self): + """ + Test that GraphRagResponseTranslator returns is_final=False + when end_of_stream=False. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("graph-rag") + response = GraphRagResponse( + response="Chunk 1", + end_of_stream=False, + error=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is False, "is_final must be False when end_of_stream=False" + assert response_dict["response"] == "Chunk 1" + assert response_dict["end_of_stream"] is False + + def test_document_rag_translator_is_final_with_end_of_stream_true(self): + """ + Test that DocumentRagResponseTranslator returns is_final=True + when end_of_stream=True. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("document-rag") + response = DocumentRagResponse( + response="A document about cats.", + end_of_stream=True, + error=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is True, "is_final must be True when end_of_stream=True" + assert response_dict["response"] == "A document about cats." + assert response_dict["end_of_stream"] is True + + def test_document_rag_translator_is_final_with_end_of_stream_false(self): + """ + Test that DocumentRagResponseTranslator returns is_final=False + when end_of_stream=False. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("document-rag") + response = DocumentRagResponse( + response="Chunk 1", + end_of_stream=False, + error=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is False, "is_final must be False when end_of_stream=False" + assert response_dict["response"] == "Chunk 1" + assert response_dict["end_of_stream"] is False + + +@pytest.mark.contract +class TestAgentTranslatorCompletionFlags: + """Contract tests for Agent response translator completion flags""" + + def test_agent_translator_is_final_with_end_of_dialog_true(self): + """ + Test that AgentResponseTranslator returns is_final=True + when end_of_dialog=True. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("agent") + response = AgentResponse( + answer="4", + error=None, + thought=None, + observation=None, + end_of_message=True, + end_of_dialog=True + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is True, "is_final must be True when end_of_dialog=True" + assert response_dict["answer"] == "4" + assert response_dict["end_of_dialog"] is True + + def test_agent_translator_is_final_with_end_of_dialog_false(self): + """ + Test that AgentResponseTranslator returns is_final=False + when end_of_dialog=False. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("agent") + response = AgentResponse( + answer=None, + error=None, + thought="I need to solve this.", + observation=None, + end_of_message=True, + end_of_dialog=False + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is False, "is_final must be False when end_of_dialog=False" + assert response_dict["thought"] == "I need to solve this." + assert response_dict["end_of_dialog"] is False + + def test_agent_translator_is_final_fallback_with_answer(self): + """ + Test that AgentResponseTranslator returns is_final=True + when answer is present (fallback for legacy responses). + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("agent") + # Legacy response without end_of_dialog flag + response = AgentResponse( + answer="4", + error=None, + thought=None, + observation=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is True, "is_final must be True when answer is present (legacy fallback)" + assert response_dict["answer"] == "4" + + def test_agent_translator_intermediate_message_is_not_final(self): + """ + Test that intermediate messages (thought/observation) return is_final=False. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("agent") + + # Test thought message + thought_response = AgentResponse( + answer=None, + error=None, + thought="Processing...", + observation=None, + end_of_message=True, + end_of_dialog=False + ) + + # Act + thought_dict, thought_is_final = translator.from_response_with_completion(thought_response) + + # Assert + assert thought_is_final is False, "Thought message must not be final" + + # Test observation message + observation_response = AgentResponse( + answer=None, + error=None, + thought=None, + observation="Result found", + end_of_message=True, + end_of_dialog=False + ) + + # Act + obs_dict, obs_is_final = translator.from_response_with_completion(observation_response) + + # Assert + assert obs_is_final is False, "Observation message must not be final" + + def test_agent_translator_streaming_format_with_end_of_dialog(self): + """ + Test that streaming format messages use end_of_dialog for is_final. + """ + # Arrange + translator = TranslatorRegistry.get_response_translator("agent") + + # Streaming format with end_of_dialog=True + response = AgentResponse( + chunk_type="answer", + content="", + end_of_message=True, + end_of_dialog=True, + answer=None, + error=None, + thought=None, + observation=None + ) + + # Act + response_dict, is_final = translator.from_response_with_completion(response) + + # Assert + assert is_final is True, "Streaming format must use end_of_dialog for is_final" + assert response_dict["end_of_dialog"] is True diff --git a/tests/unit/test_agent/test_agent_service_non_streaming.py b/tests/unit/test_agent/test_agent_service_non_streaming.py new file mode 100644 index 00000000..0fd2060d --- /dev/null +++ b/tests/unit/test_agent/test_agent_service_non_streaming.py @@ -0,0 +1,206 @@ +""" +Unit tests for Agent service non-streaming mode. +Tests that end_of_message and end_of_dialog flags are correctly set. +""" + +import pytest +from unittest.mock import MagicMock, AsyncMock, patch + +from trustgraph.agent.react.service import Processor +from trustgraph.schema import AgentRequest, AgentResponse +from trustgraph.agent.react.types import Final + + +class TestAgentServiceNonStreaming: + """Test Agent service non-streaming behavior""" + + @patch('trustgraph.agent.react.service.AgentManager') + @pytest.mark.asyncio + async def test_non_streaming_intermediate_messages_have_correct_flags(self, mock_agent_manager_class): + """ + Test that intermediate messages (thought/observation) in non-streaming mode + have end_of_message=True and end_of_dialog=False. + """ + # Setup processor + processor = Processor( + taskgroup=MagicMock(), + id="test-agent", + max_iterations=10 + ) + + # Track all responses sent + sent_responses = [] + + # Setup mock agent manager + mock_agent_instance = AsyncMock() + mock_agent_manager_class.return_value = mock_agent_instance + + # Mock react to call think and observe callbacks + async def mock_react(question, history, think, observe, answer, context, streaming): + await think("I need to solve this.", is_final=True) + await observe("The answer is 4.", is_final=True) + return Final(thought="Final answer", final="4") + + mock_agent_instance.react = mock_react + + # Setup message with non-streaming request + msg = MagicMock() + msg.value.return_value = AgentRequest( + question="What is 2 + 2?", + user="trustgraph", + streaming=False # Non-streaming mode + ) + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + + mock_producer = AsyncMock() + + async def capture_response(response, properties): + sent_responses.append(response) + + mock_producer.send = AsyncMock(side_effect=capture_response) + + def flow_router(service_name): + if service_name == "response": + return mock_producer + return AsyncMock() + + flow.side_effect = flow_router + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: should have 3 responses (thought, observation, answer) + assert len(sent_responses) == 3, f"Expected 3 responses, got {len(sent_responses)}" + + # Check thought message + thought_response = sent_responses[0] + assert isinstance(thought_response, AgentResponse) + assert thought_response.thought == "I need to solve this." + assert thought_response.answer is None + assert thought_response.end_of_message is True, "Thought message must have end_of_message=True" + assert thought_response.end_of_dialog is False, "Thought message must have end_of_dialog=False" + + # Check observation message + observation_response = sent_responses[1] + assert isinstance(observation_response, AgentResponse) + assert observation_response.observation == "The answer is 4." + assert observation_response.answer is None + assert observation_response.end_of_message is True, "Observation message must have end_of_message=True" + assert observation_response.end_of_dialog is False, "Observation message must have end_of_dialog=False" + + @patch('trustgraph.agent.react.service.AgentManager') + @pytest.mark.asyncio + async def test_non_streaming_final_answer_has_correct_flags(self, mock_agent_manager_class): + """ + Test that final answer in non-streaming mode has + end_of_message=True and end_of_dialog=True. + """ + # Setup processor + processor = Processor( + taskgroup=MagicMock(), + id="test-agent", + max_iterations=10 + ) + + # Track all responses sent + sent_responses = [] + + # Setup mock agent manager + mock_agent_instance = AsyncMock() + mock_agent_manager_class.return_value = mock_agent_instance + + # Mock react to return Final directly + async def mock_react(question, history, think, observe, answer, context, streaming): + return Final(thought="Final answer", final="4") + + mock_agent_instance.react = mock_react + + # Setup message with non-streaming request + msg = MagicMock() + msg.value.return_value = AgentRequest( + question="What is 2 + 2?", + user="trustgraph", + streaming=False # Non-streaming mode + ) + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + + mock_producer = AsyncMock() + + async def capture_response(response, properties): + sent_responses.append(response) + + mock_producer.send = AsyncMock(side_effect=capture_response) + + def flow_router(service_name): + if service_name == "response": + return mock_producer + return AsyncMock() + + flow.side_effect = flow_router + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: should have 1 response (final answer) + assert len(sent_responses) == 1, f"Expected 1 response, got {len(sent_responses)}" + + # Check final answer message + answer_response = sent_responses[0] + assert isinstance(answer_response, AgentResponse) + assert answer_response.answer == "4" + assert answer_response.thought is None + assert answer_response.observation is None + assert answer_response.end_of_message is True, "Final answer must have end_of_message=True" + assert answer_response.end_of_dialog is True, "Final answer must have end_of_dialog=True" + + @pytest.mark.asyncio + async def test_error_response_has_correct_flags(self): + """ + Test that error responses have end_of_message=True and end_of_dialog=True. + """ + # Setup processor that will error + processor = Processor( + taskgroup=MagicMock(), + id="test-agent", + max_iterations=10 + ) + + # Track all responses sent + sent_responses = [] + + # Setup message + msg = MagicMock() + msg.value.side_effect = Exception("Test error") + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + flow.producer = {"response": AsyncMock()} + + async def capture_response(response, properties): + sent_responses.append(response) + + flow.producer["response"].send = AsyncMock(side_effect=capture_response) + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: should have 1 error response + assert len(sent_responses) == 1, f"Expected 1 error response, got {len(sent_responses)}" + + # Check error response + error_response = sent_responses[0] + assert isinstance(error_response, AgentResponse) + assert error_response.error is not None + assert "Test error" in error_response.error.message + assert error_response.end_of_message is True, "Error response must have end_of_message=True" + assert error_response.end_of_dialog is True, "Error response must have end_of_dialog=True" diff --git a/tests/unit/test_retrieval/test_document_rag_service.py b/tests/unit/test_retrieval/test_document_rag_service.py index 55b9b97f..041d29df 100644 --- a/tests/unit/test_retrieval/test_document_rag_service.py +++ b/tests/unit/test_retrieval/test_document_rag_service.py @@ -74,4 +74,58 @@ class TestDocumentRagService: sent_response = mock_producer.send.call_args[0][0] assert isinstance(sent_response, DocumentRagResponse) assert sent_response.response == "test response" + assert sent_response.error is None + + @patch('trustgraph.retrieval.document_rag.rag.DocumentRag') + @pytest.mark.asyncio + async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_document_rag_class): + """ + Test that non-streaming mode sets end_of_stream=True in response. + + This is a regression test for the bug where non-streaming responses + didn't set end_of_stream, causing clients to hang waiting for more data. + """ + # Setup processor + processor = Processor( + taskgroup=MagicMock(), + id="test-processor", + doc_limit=10 + ) + + # Setup mock DocumentRag instance + mock_rag_instance = AsyncMock() + mock_document_rag_class.return_value = mock_rag_instance + mock_rag_instance.query.return_value = "A document about cats." + + # Setup message with non-streaming request + msg = MagicMock() + msg.value.return_value = DocumentRagQuery( + query="What is a cat?", + user="trustgraph", + collection="default", + doc_limit=10, + streaming=False # Non-streaming mode + ) + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + + mock_producer = AsyncMock() + def flow_router(service_name): + if service_name == "response": + return mock_producer + return AsyncMock() + flow.side_effect = flow_router + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: response was sent with end_of_stream=True + mock_producer.send.assert_called_once() + sent_response = mock_producer.send.call_args[0][0] + assert isinstance(sent_response, DocumentRagResponse) + assert sent_response.response == "A document about cats." + assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True" assert sent_response.error is None \ No newline at end of file diff --git a/tests/unit/test_retrieval/test_graph_rag_service.py b/tests/unit/test_retrieval/test_graph_rag_service.py new file mode 100644 index 00000000..ddfdfa75 --- /dev/null +++ b/tests/unit/test_retrieval/test_graph_rag_service.py @@ -0,0 +1,134 @@ +""" +Unit tests for GraphRAG service non-streaming mode. +Tests that end_of_stream flag is correctly set in non-streaming responses. +""" + +import pytest +from unittest.mock import MagicMock, AsyncMock, patch + +from trustgraph.retrieval.graph_rag.rag import Processor +from trustgraph.schema import GraphRagQuery, GraphRagResponse + + +class TestGraphRagService: + """Test GraphRAG service non-streaming behavior""" + + @patch('trustgraph.retrieval.graph_rag.rag.GraphRag') + @pytest.mark.asyncio + async def test_non_streaming_mode_sets_end_of_stream_true(self, mock_graph_rag_class): + """ + Test that non-streaming mode sets end_of_stream=True in response. + + This is a regression test for the bug where non-streaming responses + didn't set end_of_stream, causing clients to hang waiting for more data. + """ + # Setup processor + processor = Processor( + taskgroup=MagicMock(), + id="test-processor", + entity_limit=50, + triple_limit=30, + max_subgraph_size=150, + max_path_length=2 + ) + + # Setup mock GraphRag instance + mock_rag_instance = AsyncMock() + mock_graph_rag_class.return_value = mock_rag_instance + mock_rag_instance.query.return_value = "A small domesticated mammal." + + # Setup message with non-streaming request + msg = MagicMock() + msg.value.return_value = GraphRagQuery( + query="What is a cat?", + user="trustgraph", + collection="default", + entity_limit=50, + triple_limit=30, + max_subgraph_size=150, + max_path_length=2, + streaming=False # Non-streaming mode + ) + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + + # Mock flow to return AsyncMock for clients and response producer + mock_producer = AsyncMock() + def flow_router(service_name): + if service_name == "response": + return mock_producer + return AsyncMock() # embeddings, graph-embeddings, triples, prompt clients + flow.side_effect = flow_router + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: response was sent with end_of_stream=True + mock_producer.send.assert_called_once() + sent_response = mock_producer.send.call_args[0][0] + assert isinstance(sent_response, GraphRagResponse) + assert sent_response.response == "A small domesticated mammal." + assert sent_response.end_of_stream is True, "Non-streaming response must have end_of_stream=True" + assert sent_response.error is None + + @patch('trustgraph.retrieval.graph_rag.rag.GraphRag') + @pytest.mark.asyncio + async def test_error_response_in_non_streaming_mode(self, mock_graph_rag_class): + """ + Test that error responses in non-streaming mode set end_of_stream=True. + """ + # Setup processor + processor = Processor( + taskgroup=MagicMock(), + id="test-processor", + entity_limit=50, + triple_limit=30, + max_subgraph_size=150, + max_path_length=2 + ) + + # Setup mock GraphRag instance that raises an exception + mock_rag_instance = AsyncMock() + mock_graph_rag_class.return_value = mock_rag_instance + mock_rag_instance.query.side_effect = Exception("Test error") + + # Setup message with non-streaming request + msg = MagicMock() + msg.value.return_value = GraphRagQuery( + query="What is a cat?", + user="trustgraph", + collection="default", + entity_limit=50, + triple_limit=30, + max_subgraph_size=150, + max_path_length=2, + streaming=False # Non-streaming mode + ) + msg.properties.return_value = {"id": "test-id"} + + # Setup flow mock + consumer = MagicMock() + flow = MagicMock() + + mock_producer = AsyncMock() + def flow_router(service_name): + if service_name == "response": + return mock_producer + return AsyncMock() + flow.side_effect = flow_router + + # Execute + await processor.on_request(msg, consumer, flow) + + # Verify: error response was sent without end_of_stream (not streaming mode) + mock_producer.send.assert_called_once() + sent_response = mock_producer.send.call_args[0][0] + assert isinstance(sent_response, GraphRagResponse) + assert sent_response.response is None + assert sent_response.error is not None + assert sent_response.error.message == "Test error" + # Note: error responses in non-streaming mode don't set end_of_stream + # because streaming was never started diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index b1be0195..e1b8f705 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -275,13 +275,17 @@ class SocketFlowInstance: result = self.client._send_request_sync("text-completion", self.flow_id, request, streaming) if streaming: - # For text completion, yield just the content - for chunk in result: - if hasattr(chunk, 'content'): - yield chunk.content + # For text completion, return generator that yields content + return self._text_completion_generator(result) else: return result.get("response", "") + def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]: + """Generator for text completion streaming""" + for chunk in result: + if hasattr(chunk, 'content'): + yield chunk.content + def graph_rag( self, query: str, @@ -308,9 +312,7 @@ class SocketFlowInstance: result = self.client._send_request_sync("graph-rag", self.flow_id, request, streaming) if streaming: - for chunk in result: - if hasattr(chunk, 'content'): - yield chunk.content + return self._rag_generator(result) else: return result.get("response", "") @@ -336,12 +338,16 @@ class SocketFlowInstance: result = self.client._send_request_sync("document-rag", self.flow_id, request, streaming) if streaming: - for chunk in result: - if hasattr(chunk, 'content'): - yield chunk.content + return self._rag_generator(result) else: return result.get("response", "") + def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]: + """Generator for RAG streaming (graph-rag and document-rag)""" + for chunk in result: + if hasattr(chunk, 'content'): + yield chunk.content + def prompt( self, id: str, @@ -360,9 +366,7 @@ class SocketFlowInstance: result = self.client._send_request_sync("prompt", self.flow_id, request, streaming) if streaming: - for chunk in result: - if hasattr(chunk, 'content'): - yield chunk.content + return self._rag_generator(result) else: return result.get("response", "") diff --git a/trustgraph-base/trustgraph/base/agent_service.py b/trustgraph-base/trustgraph/base/agent_service.py index 0d38114b..0e5524fe 100644 --- a/trustgraph-base/trustgraph/base/agent_service.py +++ b/trustgraph-base/trustgraph/base/agent_service.py @@ -48,13 +48,13 @@ class AgentService(FlowProcessor): async def on_request(self, msg, consumer, flow): + # Get ID early so error handler can use it + id = msg.properties().get("id", "unknown") + try: request = msg.value() - # Sender-produced ID - id = msg.properties()["id"] - async def respond(resp): await flow("response").send( @@ -93,6 +93,8 @@ class AgentService(FlowProcessor): thought = None, observation = None, answer = None, + end_of_message = True, + end_of_dialog = True, ), properties={"id": id} ) diff --git a/trustgraph-base/trustgraph/messaging/translators/agent.py b/trustgraph-base/trustgraph/messaging/translators/agent.py index 4319fd16..4289df0a 100644 --- a/trustgraph-base/trustgraph/messaging/translators/agent.py +++ b/trustgraph-base/trustgraph/messaging/translators/agent.py @@ -44,13 +44,16 @@ class AgentResponseTranslator(MessageTranslator): result["end_of_message"] = getattr(obj, "end_of_message", False) result["end_of_dialog"] = getattr(obj, "end_of_dialog", False) else: - # Legacy format + # Legacy format (non-streaming) if obj.answer: result["answer"] = obj.answer if obj.thought: result["thought"] = obj.thought if obj.observation: result["observation"] = obj.observation + # Include completion flags for legacy format too + result["end_of_message"] = getattr(obj, "end_of_message", False) + result["end_of_dialog"] = getattr(obj, "end_of_dialog", False) # Always include error if present if hasattr(obj, 'error') and obj.error and obj.error.message: diff --git a/trustgraph-flow/trustgraph/agent/react/service.py b/trustgraph-flow/trustgraph/agent/react/service.py index d4a4d72f..3af851d2 100755 --- a/trustgraph-flow/trustgraph/agent/react/service.py +++ b/trustgraph-flow/trustgraph/agent/react/service.py @@ -232,12 +232,14 @@ class Processor(AgentService): observation=None, ) else: - # Legacy format + # Non-streaming format r = AgentResponse( answer=None, error=None, thought=x, observation=None, + end_of_message=True, + end_of_dialog=False, ) await respond(r) @@ -260,12 +262,14 @@ class Processor(AgentService): observation=x, ) else: - # Legacy format + # Non-streaming format r = AgentResponse( answer=None, error=None, thought=None, observation=x, + end_of_message=True, + end_of_dialog=False, ) await respond(r) @@ -288,12 +292,14 @@ class Processor(AgentService): observation=None, ) else: - # Legacy format - shouldn't be called in non-streaming mode + # Non-streaming format - shouldn't normally be called r = AgentResponse( answer=x, error=None, thought=None, observation=None, + end_of_message=True, + end_of_dialog=False, ) await respond(r) @@ -364,11 +370,14 @@ class Processor(AgentService): thought=None, ) else: - # Legacy format - send complete answer + # Non-streaming format - send complete answer r = AgentResponse( answer=act.final, error=None, thought=None, + observation=None, + end_of_message=True, + end_of_dialog=True, ) await respond(r) diff --git a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py index 14d71d97..6490562a 100755 --- a/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py +++ b/trustgraph-flow/trustgraph/retrieval/document_rag/rag.py @@ -128,6 +128,7 @@ class Processor(FlowProcessor): await flow("response").send( DocumentRagResponse( response = response, + end_of_stream = True, error = None ), properties = {"id": id} diff --git a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py index d159dbae..d8bfbddb 100755 --- a/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py +++ b/trustgraph-flow/trustgraph/retrieval/graph_rag/rag.py @@ -171,6 +171,7 @@ class Processor(FlowProcessor): await flow("response").send( GraphRagResponse( response = response, + end_of_stream = True, error = None ), properties = {"id": id} From 99f17d1b9da2eab2dfe9608f038f58c2d05f1874 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Mon, 12 Jan 2026 21:21:51 +0000 Subject: [PATCH 3/9] Fix non-streaming (2) (#608) --- .../trustgraph/api/socket_client.py | 21 ++++++++++++++++++- trustgraph-cli/trustgraph/cli/invoke_agent.py | 21 ++++++++++++++----- 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index e1b8f705..23e3dbc0 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -214,6 +214,23 @@ class SocketClient: content=resp.get("content", ""), end_of_message=resp.get("end_of_message", False) ) + # Non-streaming agent format: chunk_type is empty but has thought/observation/answer fields + elif resp.get("thought"): + return AgentThought( + content=resp.get("thought", ""), + end_of_message=resp.get("end_of_message", False) + ) + elif resp.get("observation"): + return AgentObservation( + content=resp.get("observation", ""), + end_of_message=resp.get("end_of_message", False) + ) + elif resp.get("answer"): + return AgentAnswer( + content=resp.get("answer", ""), + end_of_message=resp.get("end_of_message", False), + end_of_dialog=resp.get("end_of_dialog", False) + ) else: # RAG-style chunk (or generic chunk) # Text-completion uses "response" field, RAG uses "chunk" field, Prompt uses "text" field @@ -261,7 +278,9 @@ class SocketFlowInstance: request["history"] = history request.update(kwargs) - return self.client._send_request_sync("agent", self.flow_id, request, streaming) + # Agents always use multipart messaging (multiple complete messages) + # regardless of streaming flag, so always use the streaming code path + return self.client._send_request_sync("agent", self.flow_id, request, streaming=True) def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]: """Text completion with optional streaming""" diff --git a/trustgraph-cli/trustgraph/cli/invoke_agent.py b/trustgraph-cli/trustgraph/cli/invoke_agent.py index de70021b..369fcdd4 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_agent.py +++ b/trustgraph-cli/trustgraph/cli/invoke_agent.py @@ -178,11 +178,22 @@ def question( print() else: - # Non-streaming response - if "answer" in response: - print(response["answer"]) - if "error" in response: - raise RuntimeError(response["error"]) + # Non-streaming response - but agents use multipart messaging + # so we iterate through the chunks (which are complete messages, not text chunks) + for chunk in response: + # Display thoughts if verbose + if chunk.chunk_type == "thought" and verbose: + output(wrap(chunk.content), "\U0001f914 ") + print() + + # Display observations if verbose + elif chunk.chunk_type == "observation" and verbose: + output(wrap(chunk.content), "\U0001f4a1 ") + print() + + # Display answer + elif chunk.chunk_type == "final-answer" or chunk.chunk_type == "answer": + print(chunk.content) finally: # Clean up socket connection From b08db761d7ccff772a9dccb03ddd0c0f132d4c02 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Wed, 14 Jan 2026 12:31:40 +0000 Subject: [PATCH 4/9] Fix config inconsistency (#609) * Plural/singular confusion in config key * Flow class vs flow blueprint nomenclature change * Update docs & CLI to reflect the above --- docs/apis/api-flow.md | 44 +-- docs/cli/README.md | 16 +- ...w-class.md => tg-delete-flow-blueprint.md} | 148 ++++---- ...flow-class.md => tg-get-flow-blueprint.md} | 154 ++++---- docs/cli/tg-invoke-mcp-tool.md | 2 +- docs/cli/tg-invoke-prompt.md | 2 +- ...flow-class.md => tg-put-flow-blueprint.md} | 96 ++--- docs/cli/tg-show-config.md | 6 +- docs/cli/tg-show-flow-blueprints.md | 330 ++++++++++++++++++ docs/cli/tg-show-flow-classes.md | 330 ------------------ docs/cli/tg-show-flow-state.md | 8 +- docs/cli/tg-show-flows.md | 6 +- docs/cli/tg-start-flow.md | 32 +- docs/cli/tg-start-library-processing.md | 2 +- docs/cli/tg-stop-flow.md | 2 +- docs/tech-specs/flow-class-definition.md | 20 +- .../flow-configurable-parameters.md | 52 +-- tests/unit/test_base/test_flow_processor.py | 14 +- trustgraph-base/trustgraph/api/flow.py | 32 +- .../trustgraph/base/flow_processor.py | 6 +- .../trustgraph/messaging/translators/flow.py | 20 +- .../trustgraph/schema/services/flow.py | 28 +- trustgraph-cli/pyproject.toml | 8 +- ...flow_class.py => delete_flow_blueprint.py} | 18 +- ...et_flow_class.py => get_flow_blueprint.py} | 18 +- ...ut_flow_class.py => put_flow_blueprint.py} | 20 +- ...low_classes.py => show_flow_blueprints.py} | 26 +- .../trustgraph/cli/show_flow_state.py | 6 +- trustgraph-cli/trustgraph/cli/show_flows.py | 34 +- .../trustgraph/cli/show_parameter_types.py | 6 +- .../trustgraph/cli/show_token_costs.py | 4 +- trustgraph-cli/trustgraph/cli/start_flow.py | 12 +- .../trustgraph/cli/verify_system_status.py | 18 +- .../trustgraph/agent/mcp_tool/service.py | 4 +- .../trustgraph/config/service/flow.py | 104 +++--- .../trustgraph/metering/counter.py | 2 +- 36 files changed, 816 insertions(+), 814 deletions(-) rename docs/cli/{tg-delete-flow-class.md => tg-delete-flow-blueprint.md} (53%) rename docs/cli/{tg-get-flow-class.md => tg-get-flow-blueprint.md} (52%) rename docs/cli/{tg-put-flow-class.md => tg-put-flow-blueprint.md} (76%) create mode 100644 docs/cli/tg-show-flow-blueprints.md delete mode 100644 docs/cli/tg-show-flow-classes.md rename trustgraph-cli/trustgraph/cli/{delete_flow_class.py => delete_flow_blueprint.py} (65%) rename trustgraph-cli/trustgraph/cli/{get_flow_class.py => get_flow_blueprint.py} (67%) rename trustgraph-cli/trustgraph/cli/{put_flow_class.py => put_flow_blueprint.py} (69%) rename trustgraph-cli/trustgraph/cli/{show_flow_classes.py => show_flow_blueprints.py} (83%) diff --git a/docs/apis/api-flow.md b/docs/apis/api-flow.md index f78d96fd..fbd3d660 100644 --- a/docs/apis/api-flow.md +++ b/docs/apis/api-flow.md @@ -1,6 +1,6 @@ # TrustGraph Flow API -This API provides workflow management for TrustGraph components. It manages flow classes +This API provides workflow management for TrustGraph components. It manages flow blueprintes (workflow templates) and flow instances (active running workflows) that orchestrate complex data processing pipelines. @@ -10,26 +10,26 @@ complex data processing pipelines. The request contains the following fields: - `operation`: The operation to perform (see operations below) -- `class_name`: Flow class name (for class operations and start-flow) -- `class_definition`: Flow class definition JSON (for put-class) +- `blueprint_name`: Flow blueprint name (for class operations and start-flow) +- `class_definition`: Flow blueprint definition JSON (for put-class) - `description`: Flow description (for start-flow) - `flow_id`: Flow instance ID (for flow instance operations) ### Response The response contains the following fields: -- `class_names`: Array of flow class names (returned by list-classes) +- `blueprint_names`: Array of flow blueprint names (returned by list-classes) - `flow_ids`: Array of active flow IDs (returned by list-flows) -- `class_definition`: Flow class definition JSON (returned by get-class) +- `class_definition`: Flow blueprint definition JSON (returned by get-class) - `flow`: Flow instance JSON (returned by get-flow) - `description`: Flow description (returned by get-flow) - `error`: Error information if operation fails ## Operations -### Flow Class Operations +### Flow Blueprint Operations -#### LIST-CLASSES - List All Flow Classes +#### LIST-CLASSES - List All Flow Blueprintes Request: ```json @@ -41,17 +41,17 @@ Request: Response: ```json { - "class_names": ["pdf-processor", "text-analyzer", "knowledge-extractor"] + "blueprint_names": ["pdf-processor", "text-analyzer", "knowledge-extractor"] } ``` -#### GET-CLASS - Get Flow Class Definition +#### GET-CLASS - Get Flow Blueprint Definition Request: ```json { "operation": "get-class", - "class_name": "pdf-processor" + "blueprint_name": "pdf-processor" } ``` @@ -62,13 +62,13 @@ Response: } ``` -#### PUT-CLASS - Create/Update Flow Class +#### PUT-CLASS - Create/Update Flow Blueprint Request: ```json { "operation": "put-class", - "class_name": "pdf-processor", + "blueprint_name": "pdf-processor", "class_definition": "{\"interfaces\": {\"text-completion\": {\"request\": \"persistent://tg/request/text-completion\", \"response\": \"persistent://tg/response/text-completion\"}}, \"description\": \"PDF processing workflow\"}" } ``` @@ -78,13 +78,13 @@ Response: {} ``` -#### DELETE-CLASS - Remove Flow Class +#### DELETE-CLASS - Remove Flow Blueprint Request: ```json { "operation": "delete-class", - "class_name": "pdf-processor" + "blueprint_name": "pdf-processor" } ``` @@ -135,7 +135,7 @@ Request: ```json { "operation": "start-flow", - "class_name": "pdf-processor", + "blueprint_name": "pdf-processor", "flow_id": "flow-123", "description": "Processing document batch 1" } @@ -186,7 +186,7 @@ Response: { "id": "unique-request-id", "response": { - "class_names": ["pdf-processor", "text-analyzer"] + "blueprint_names": ["pdf-processor", "text-analyzer"] }, "complete": true } @@ -264,11 +264,11 @@ from trustgraph.api.flow import FlowClient client = FlowClient() -# List all flow classes -classes = await client.list_classes() +# List all flow blueprintes +classes = await client.list_blueprints() -# Get a flow class definition -definition = await client.get_class("pdf-processor") +# Get a flow blueprint definition +definition = await client.get_blueprint("pdf-processor") # Start a flow instance await client.start_flow("pdf-processor", "flow-123", "Processing batch 1") @@ -286,8 +286,8 @@ result = await flow.mcp_tool("file-reader", {"path": "/path/to/file.txt"}) ## Features -- **Flow Classes**: Templates that define workflow structure and interfaces -- **Flow Instances**: Active running workflows based on flow classes +- **Flow Blueprintes**: Templates that define workflow structure and interfaces +- **Flow Instances**: Active running workflows based on flow blueprintes - **Dynamic Management**: Flows can be started/stopped dynamically - **Template Processing**: Uses template replacement for customizing flow instances - **Integration**: Works with TrustGraph ecosystem for data processing pipelines diff --git a/docs/cli/README.md b/docs/cli/README.md index 6a47dbbd..10f74c09 100644 --- a/docs/cli/README.md +++ b/docs/cli/README.md @@ -48,11 +48,11 @@ Most CLI commands support these common options: - [`tg-show-flows`](tg-show-flows.md) - List all configured flows - [`tg-show-flow-state`](tg-show-flow-state.md) - Show current flow states -**Flow Class Management:** -- [`tg-put-flow-class`](tg-put-flow-class.md) - Upload/update flow class definition -- [`tg-get-flow-class`](tg-get-flow-class.md) - Retrieve flow class definition -- [`tg-delete-flow-class`](tg-delete-flow-class.md) - Remove flow class definition -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes +**Flow Blueprint Management:** +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definition +- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definition +- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definition +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes ### Knowledge Graph Management @@ -115,7 +115,7 @@ Most CLI commands support these common options: ### Basic Document Processing ```bash # Start a flow -tg-start-flow --flow-id my-flow --class-name document-processing +tg-start-flow --flow-id my-flow --blueprint-name document-processing # Load a document tg-load-text --flow-id my-flow --text "Your document content" --title "Test Document" @@ -138,8 +138,8 @@ tg-show-graph --limit 100 ### Flow Management ```bash -# Show available flow classes -tg-show-flow-classes +# Show available flow blueprintes +tg-show-flow-blueprints # Show running flows tg-show-flows diff --git a/docs/cli/tg-delete-flow-class.md b/docs/cli/tg-delete-flow-blueprint.md similarity index 53% rename from docs/cli/tg-delete-flow-class.md rename to docs/cli/tg-delete-flow-blueprint.md index cc3c58d8..c99fd6da 100644 --- a/docs/cli/tg-delete-flow-class.md +++ b/docs/cli/tg-delete-flow-blueprint.md @@ -1,24 +1,24 @@ -# tg-delete-flow-class +# tg-delete-flow-blueprint -Permanently deletes a flow class definition from TrustGraph. +Permanently deletes a flow blueprint definition from TrustGraph. ## Synopsis ```bash -tg-delete-flow-class -n CLASS_NAME [options] +tg-delete-flow-blueprint -n CLASS_NAME [options] ``` ## Description -The `tg-delete-flow-class` command permanently removes a flow class definition from TrustGraph. This operation cannot be undone, so use with caution. +The `tg-delete-flow-blueprint` command permanently removes a flow blueprint definition from TrustGraph. This operation cannot be undone, so use with caution. -**⚠️ Warning**: Deleting a flow class that has active flow instances may cause those instances to become unusable. Always check for active flows before deletion. +**⚠️ Warning**: Deleting a flow blueprint that has active flow instances may cause those instances to become unusable. Always check for active flows before deletion. ## Options ### Required Arguments -- `-n, --class-name CLASS_NAME`: Name of the flow class to delete +- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to delete ### Optional Arguments @@ -26,65 +26,65 @@ The `tg-delete-flow-class` command permanently removes a flow class definition f ## Examples -### Delete a Flow Class +### Delete a Flow Blueprint ```bash -tg-delete-flow-class -n "old-test-flow" +tg-delete-flow-blueprint -n "old-test-flow" ``` ### Delete with Custom API URL ```bash -tg-delete-flow-class -n "deprecated-flow" -u http://staging:8088/ +tg-delete-flow-blueprint -n "deprecated-flow" -u http://staging:8088/ ``` ### Safe Deletion Workflow ```bash -# 1. Check if flow class exists -tg-show-flow-classes | grep "target-flow" +# 1. Check if flow blueprint exists +tg-show-flow-blueprints | grep "target-flow" -# 2. Backup the flow class first -tg-get-flow-class -n "target-flow" > backup-target-flow.json +# 2. Backup the flow blueprint first +tg-get-flow-blueprint -n "target-flow" > backup-target-flow.json # 3. Check for active flow instances tg-show-flows | grep "target-flow" -# 4. Delete the flow class -tg-delete-flow-class -n "target-flow" +# 4. Delete the flow blueprint +tg-delete-flow-blueprint -n "target-flow" # 5. Verify deletion -tg-show-flow-classes | grep "target-flow" || echo "Flow class deleted successfully" +tg-show-flow-blueprints | grep "target-flow" || echo "Flow blueprint deleted successfully" ``` ## Prerequisites -### Flow Class Must Exist -Verify the flow class exists before attempting deletion: +### Flow Blueprint Must Exist +Verify the flow blueprint exists before attempting deletion: ```bash -# List all flow classes -tg-show-flow-classes +# List all flow blueprintes +tg-show-flow-blueprints -# Check specific flow class -tg-show-flow-classes | grep "target-class" +# Check specific flow blueprint +tg-show-flow-blueprints | grep "target-class" ``` ### Check for Active Flow Instances -Before deleting a flow class, check if any flow instances are using it: +Before deleting a flow blueprint, check if any flow instances are using it: ```bash # List all active flows tg-show-flows -# Look for instances using the flow class +# Look for instances using the flow blueprint tg-show-flows | grep "target-class" ``` ## Error Handling -### Flow Class Not Found +### Flow Blueprint Not Found ```bash -Exception: Flow class 'nonexistent-class' not found +Exception: Flow blueprint 'nonexistent-class' not found ``` -**Solution**: Verify the flow class exists with `tg-show-flow-classes`. +**Solution**: Verify the flow blueprint exists with `tg-show-flow-blueprints`. ### Connection Errors ```bash @@ -94,13 +94,13 @@ Exception: Connection refused ### Permission Errors ```bash -Exception: Access denied to delete flow class +Exception: Access denied to delete flow blueprint ``` -**Solution**: Verify user permissions for flow class management. +**Solution**: Verify user permissions for flow blueprint management. ### Active Flow Instances ```bash -Exception: Cannot delete flow class with active instances +Exception: Cannot delete flow blueprint with active instances ``` **Solution**: Stop all flow instances using this class before deletion. @@ -108,37 +108,37 @@ Exception: Cannot delete flow class with active instances ### Cleanup Development Classes ```bash -# Delete test and development flow classes +# Delete test and development flow blueprintes test_classes=("test-flow-v1" "dev-experiment" "prototype-flow") for class in "${test_classes[@]}"; do echo "Deleting $class..." - tg-delete-flow-class -n "$class" + tg-delete-flow-blueprint -n "$class" done ``` ### Migration Cleanup ```bash -# After migrating to new flow classes, remove old ones +# After migrating to new flow blueprintes, remove old ones old_classes=("legacy-flow" "deprecated-processor" "old-pipeline") for class in "${old_classes[@]}"; do # Backup first - tg-get-flow-class -n "$class" > "backup-$class.json" 2>/dev/null + tg-get-flow-blueprint -n "$class" > "backup-$class.json" 2>/dev/null # Delete - tg-delete-flow-class -n "$class" + tg-delete-flow-blueprint -n "$class" echo "Deleted $class" done ``` ### Conditional Deletion ```bash -# Delete flow class only if no active instances exist +# Delete flow blueprint only if no active instances exist flow_class="target-flow" active_instances=$(tg-show-flows | grep "$flow_class" | wc -l) if [ $active_instances -eq 0 ]; then - echo "No active instances found, deleting flow class..." - tg-delete-flow-class -n "$flow_class" + echo "No active instances found, deleting flow blueprint..." + tg-delete-flow-blueprint -n "$flow_class" else echo "Warning: $active_instances active instances found. Cannot delete." tg-show-flows | grep "$flow_class" @@ -154,13 +154,13 @@ flow_class="important-flow" backup_dir="flow-class-backups/$(date +%Y%m%d-%H%M%S)" mkdir -p "$backup_dir" -echo "Backing up flow class: $flow_class" -tg-get-flow-class -n "$flow_class" > "$backup_dir/$flow_class.json" +echo "Backing up flow blueprint: $flow_class" +tg-get-flow-blueprint -n "$flow_class" > "$backup_dir/$flow_class.json" if [ $? -eq 0 ]; then echo "Backup created: $backup_dir/$flow_class.json" echo "Proceeding with deletion..." - tg-delete-flow-class -n "$flow_class" + tg-delete-flow-blueprint -n "$flow_class" else echo "Backup failed. Aborting deletion." exit 1 @@ -174,22 +174,22 @@ fi flow_class="$1" if [ -z "$flow_class" ]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi -echo "Safety checks for deleting flow class: $flow_class" +echo "Safety checks for deleting flow blueprint: $flow_class" -# Check if flow class exists -if ! tg-show-flow-classes | grep -q "$flow_class"; then - echo "ERROR: Flow class '$flow_class' not found" +# Check if flow blueprint exists +if ! tg-show-flow-blueprints | grep -q "$flow_class"; then + echo "ERROR: Flow blueprint '$flow_class' not found" exit 1 fi # Check for active instances active_count=$(tg-show-flows | grep "$flow_class" | wc -l) if [ $active_count -gt 0 ]; then - echo "ERROR: Found $active_count active instances using this flow class" + echo "ERROR: Found $active_count active instances using this flow blueprint" echo "Active instances:" tg-show-flows | grep "$flow_class" exit 1 @@ -198,7 +198,7 @@ fi # Create backup backup_file="backup-$flow_class-$(date +%Y%m%d-%H%M%S).json" echo "Creating backup: $backup_file" -tg-get-flow-class -n "$flow_class" > "$backup_file" +tg-get-flow-blueprint -n "$flow_class" > "$backup_file" if [ $? -ne 0 ]; then echo "ERROR: Failed to create backup" @@ -206,19 +206,19 @@ if [ $? -ne 0 ]; then fi # Confirm deletion -echo "Ready to delete flow class: $flow_class" +echo "Ready to delete flow blueprint: $flow_class" echo "Backup saved as: $backup_file" -read -p "Are you sure you want to delete this flow class? (y/N): " confirm +read -p "Are you sure you want to delete this flow blueprint? (y/N): " confirm if [ "$confirm" = "y" ] || [ "$confirm" = "Y" ]; then - echo "Deleting flow class..." - tg-delete-flow-class -n "$flow_class" + echo "Deleting flow blueprint..." + tg-delete-flow-blueprint -n "$flow_class" # Verify deletion - if ! tg-show-flow-classes | grep -q "$flow_class"; then - echo "Flow class deleted successfully" + if ! tg-show-flow-blueprints | grep -q "$flow_class"; then + echo "Flow blueprint deleted successfully" else - echo "ERROR: Flow class still exists after deletion" + echo "ERROR: Flow blueprint still exists after deletion" exit 1 fi else @@ -229,13 +229,13 @@ fi ## Integration with Other Commands -### Complete Flow Class Lifecycle +### Complete Flow Blueprint Lifecycle ```bash -# 1. List existing flow classes -tg-show-flow-classes +# 1. List existing flow blueprintes +tg-show-flow-blueprints -# 2. Get flow class details -tg-get-flow-class -n "target-flow" +# 2. Get flow blueprint details +tg-get-flow-blueprint -n "target-flow" # 3. Check for active instances tg-show-flows | grep "target-flow" @@ -244,25 +244,25 @@ tg-show-flows | grep "target-flow" tg-stop-flow -i "instance-id" # 5. Create backup -tg-get-flow-class -n "target-flow" > backup.json +tg-get-flow-blueprint -n "target-flow" > backup.json -# 6. Delete flow class -tg-delete-flow-class -n "target-flow" +# 6. Delete flow blueprint +tg-delete-flow-blueprint -n "target-flow" # 7. Verify deletion -tg-show-flow-classes | grep "target-flow" +tg-show-flow-blueprints | grep "target-flow" ``` ### Bulk Deletion with Validation ```bash -# Delete multiple flow classes safely +# Delete multiple flow blueprintes safely classes_to_delete=("old-flow1" "old-flow2" "test-flow") for class in "${classes_to_delete[@]}"; do echo "Processing $class..." # Check if exists - if ! tg-show-flow-classes | grep -q "$class"; then + if ! tg-show-flow-blueprints | grep -q "$class"; then echo " $class not found, skipping" continue fi @@ -274,8 +274,8 @@ for class in "${classes_to_delete[@]}"; do fi # Backup and delete - tg-get-flow-class -n "$class" > "backup-$class.json" - tg-delete-flow-class -n "$class" + tg-get-flow-blueprint -n "$class" > "backup-$class.json" + tg-delete-flow-blueprint -n "$class" echo " $class deleted" done ``` @@ -286,15 +286,15 @@ done ## Related Commands -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes -- [`tg-get-flow-class`](tg-get-flow-class.md) - Retrieve flow class definitions -- [`tg-put-flow-class`](tg-put-flow-class.md) - Create/update flow class definitions +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes +- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definitions +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Create/update flow blueprint definitions - [`tg-show-flows`](tg-show-flows.md) - List active flow instances - [`tg-stop-flow`](tg-stop-flow.md) - Stop flow instances ## API Integration -This command uses the [Flow API](../apis/api-flow.md) with the `delete-class` operation to remove flow class definitions. +This command uses the [Flow API](../apis/api-flow.md) with the `delete-class` operation to remove flow blueprint definitions. ## Best Practices @@ -310,10 +310,10 @@ This command uses the [Flow API](../apis/api-flow.md) with the `delete-class` op ### Command Succeeds but Class Still Exists ```bash # Check if deletion actually occurred -tg-show-flow-classes | grep "deleted-class" +tg-show-flow-blueprints | grep "deleted-class" # Verify API connectivity -tg-show-flow-classes > /dev/null && echo "API accessible" +tg-show-flow-blueprints > /dev/null && echo "API accessible" ``` ### Permissions Issues diff --git a/docs/cli/tg-get-flow-class.md b/docs/cli/tg-get-flow-blueprint.md similarity index 52% rename from docs/cli/tg-get-flow-class.md rename to docs/cli/tg-get-flow-blueprint.md index c71b4367..1998c3b1 100644 --- a/docs/cli/tg-get-flow-class.md +++ b/docs/cli/tg-get-flow-blueprint.md @@ -1,24 +1,24 @@ -# tg-get-flow-class +# tg-get-flow-blueprint -Retrieves and displays a flow class definition in JSON format. +Retrieves and displays a flow blueprint definition in JSON format. ## Synopsis ```bash -tg-get-flow-class -n CLASS_NAME [options] +tg-get-flow-blueprint -n CLASS_NAME [options] ``` ## Description -The `tg-get-flow-class` command retrieves a stored flow class definition from TrustGraph and displays it in formatted JSON. This is useful for examining flow class configurations, creating backups, or preparing to modify existing flow classes. +The `tg-get-flow-blueprint` command retrieves a stored flow blueprint definition from TrustGraph and displays it in formatted JSON. This is useful for examining flow blueprint configurations, creating backups, or preparing to modify existing flow blueprintes. -The output can be saved to files for version control, documentation, or as input for creating new flow classes with `tg-put-flow-class`. +The output can be saved to files for version control, documentation, or as input for creating new flow blueprintes with `tg-put-flow-blueprint`. ## Options ### Required Arguments -- `-n, --class-name CLASS_NAME`: Name of the flow class to retrieve +- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to retrieve ### Optional Arguments @@ -26,32 +26,32 @@ The output can be saved to files for version control, documentation, or as input ## Examples -### Display Flow Class Definition +### Display Flow Blueprint Definition ```bash -tg-get-flow-class -n "document-processing" +tg-get-flow-blueprint -n "document-processing" ``` -### Save Flow Class to File +### Save Flow Blueprint to File ```bash -tg-get-flow-class -n "production-flow" > production-flow-backup.json +tg-get-flow-blueprint -n "production-flow" > production-flow-backup.json ``` -### Compare Flow Classes +### Compare Flow Blueprintes ```bash -# Get multiple flow classes for comparison -tg-get-flow-class -n "dev-flow" > dev-flow.json -tg-get-flow-class -n "prod-flow" > prod-flow.json +# Get multiple flow blueprintes for comparison +tg-get-flow-blueprint -n "dev-flow" > dev-flow.json +tg-get-flow-blueprint -n "prod-flow" > prod-flow.json diff dev-flow.json prod-flow.json ``` ### Using Custom API URL ```bash -tg-get-flow-class -n "remote-flow" -u http://production:8088/ +tg-get-flow-blueprint -n "remote-flow" -u http://production:8088/ ``` ## Output Format -The command outputs the flow class definition in formatted JSON: +The command outputs the flow blueprint definition in formatted JSON: ```json { @@ -76,7 +76,7 @@ The command outputs the flow class definition in formatted JSON: ### Key Components #### Description -Human-readable description of the flow class purpose and capabilities. +Human-readable description of the flow blueprint purpose and capabilities. #### Interfaces Service definitions showing: @@ -84,28 +84,28 @@ Service definitions showing: - **Fire-and-Forget Services**: Services with only input queues #### Tags (Optional) -Categorization tags for organizing flow classes. +Categorization tags for organizing flow blueprintes. ## Prerequisites -### Flow Class Must Exist -Verify the flow class exists before retrieval: +### Flow Blueprint Must Exist +Verify the flow blueprint exists before retrieval: ```bash -# Check available flow classes -tg-show-flow-classes +# Check available flow blueprintes +tg-show-flow-blueprints # Look for specific class -tg-show-flow-classes | grep "target-class" +tg-show-flow-blueprints | grep "target-class" ``` ## Error Handling -### Flow Class Not Found +### Flow Blueprint Not Found ```bash -Exception: Flow class 'invalid-class' not found +Exception: Flow blueprint 'invalid-class' not found ``` -**Solution**: Check available classes with `tg-show-flow-classes` and verify the class name. +**Solution**: Check available classes with `tg-show-flow-blueprints` and verify the class name. ### Connection Errors ```bash @@ -115,54 +115,54 @@ Exception: Connection refused ### Permission Errors ```bash -Exception: Access denied to flow class +Exception: Access denied to flow blueprint ``` -**Solution**: Verify user permissions for accessing flow class definitions. +**Solution**: Verify user permissions for accessing flow blueprint definitions. ## Use Cases ### Configuration Backup ```bash -# Backup all flow classes +# Backup all flow blueprintes mkdir -p flow-class-backups/$(date +%Y%m%d) -tg-show-flow-classes | awk '{print $1}' | while read class; do +tg-show-flow-blueprints | awk '{print $1}' | while read class; do if [ "$class" != "flow" ]; then # Skip header - tg-get-flow-class -n "$class" > "flow-class-backups/$(date +%Y%m%d)/$class.json" + tg-get-flow-blueprint -n "$class" > "flow-class-backups/$(date +%Y%m%d)/$class.json" fi done ``` -### Flow Class Migration +### Flow Blueprint Migration ```bash # Export from source environment -tg-get-flow-class -n "production-flow" -u http://source:8088/ > prod-flow.json +tg-get-flow-blueprint -n "production-flow" -u http://source:8088/ > prod-flow.json # Import to target environment -tg-put-flow-class -n "production-flow" -c "$(cat prod-flow.json)" -u http://target:8088/ +tg-put-flow-blueprint -n "production-flow" -c "$(cat prod-flow.json)" -u http://target:8088/ ``` ### Template Creation ```bash -# Get existing flow class as template -tg-get-flow-class -n "base-flow" > template.json +# Get existing flow blueprint as template +tg-get-flow-blueprint -n "base-flow" > template.json # Modify template and create new class sed 's/base-flow/new-flow/g' template.json > new-flow.json -tg-put-flow-class -n "custom-flow" -c "$(cat new-flow.json)" +tg-put-flow-blueprint -n "custom-flow" -c "$(cat new-flow.json)" ``` ### Configuration Analysis ```bash -# Analyze flow class configurations -tg-get-flow-class -n "complex-flow" | jq '.interfaces | keys' -tg-get-flow-class -n "complex-flow" | jq '.interfaces | length' +# Analyze flow blueprint configurations +tg-get-flow-blueprint -n "complex-flow" | jq '.interfaces | keys' +tg-get-flow-blueprint -n "complex-flow" | jq '.interfaces | length' ``` ### Version Control Integration ```bash -# Store flow classes in git +# Store flow blueprintes in git mkdir -p flow-classes -tg-get-flow-class -n "main-flow" > flow-classes/main-flow.json +tg-get-flow-blueprint -n "main-flow" > flow-classes/main-flow.json git add flow-classes/main-flow.json git commit -m "Update main-flow configuration" ``` @@ -172,60 +172,60 @@ git commit -m "Update main-flow configuration" ### Extract Specific Information ```bash # Get only interface names -tg-get-flow-class -n "my-flow" | jq -r '.interfaces | keys[]' +tg-get-flow-blueprint -n "my-flow" | jq -r '.interfaces | keys[]' # Get only description -tg-get-flow-class -n "my-flow" | jq -r '.description' +tg-get-flow-blueprint -n "my-flow" | jq -r '.description' # Get request queues -tg-get-flow-class -n "my-flow" | jq -r '.interfaces | to_entries[] | select(.value.request) | .value.request' +tg-get-flow-blueprint -n "my-flow" | jq -r '.interfaces | to_entries[] | select(.value.request) | .value.request' ``` ### Validate Configuration ```bash # Validate JSON structure -tg-get-flow-class -n "my-flow" | jq . > /dev/null && echo "Valid JSON" || echo "Invalid JSON" +tg-get-flow-blueprint -n "my-flow" | jq . > /dev/null && echo "Valid JSON" || echo "Invalid JSON" # Check required fields -config=$(tg-get-flow-class -n "my-flow") +config=$(tg-get-flow-blueprint -n "my-flow") echo "$config" | jq -e '.description' > /dev/null || echo "Missing description" echo "$config" | jq -e '.interfaces' > /dev/null || echo "Missing interfaces" ``` ## Integration with Other Commands -### Flow Class Lifecycle +### Flow Blueprint Lifecycle ```bash -# 1. Examine existing flow class -tg-get-flow-class -n "old-flow" +# 1. Examine existing flow blueprint +tg-get-flow-blueprint -n "old-flow" # 2. Save backup -tg-get-flow-class -n "old-flow" > old-flow-backup.json +tg-get-flow-blueprint -n "old-flow" > old-flow-backup.json # 3. Modify configuration cp old-flow-backup.json new-flow.json # Edit new-flow.json as needed # 4. Upload new version -tg-put-flow-class -n "updated-flow" -c "$(cat new-flow.json)" +tg-put-flow-blueprint -n "updated-flow" -c "$(cat new-flow.json)" -# 5. Test new flow class +# 5. Test new flow blueprint tg-start-flow -n "updated-flow" -i "test-instance" -d "Testing updated flow" ``` ### Bulk Operations ```bash -# Process multiple flow classes +# Process multiple flow blueprintes flow_classes=("flow1" "flow2" "flow3") for class in "${flow_classes[@]}"; do echo "Processing $class..." - tg-get-flow-class -n "$class" > "backup-$class.json" + tg-get-flow-blueprint -n "$class" > "backup-$class.json" # Modify configuration sed 's/old-pattern/new-pattern/g' "backup-$class.json" > "updated-$class.json" # Upload updated version - tg-put-flow-class -n "$class" -c "$(cat updated-$class.json)" + tg-put-flow-blueprint -n "$class" -c "$(cat updated-$class.json)" done ``` @@ -235,29 +235,29 @@ done ## Related Commands -- [`tg-put-flow-class`](tg-put-flow-class.md) - Upload/update flow class definitions -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes -- [`tg-delete-flow-class`](tg-delete-flow-class.md) - Remove flow class definitions +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definitions +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes +- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definitions - [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes ## API Integration -This command uses the [Flow API](../apis/api-flow.md) with the `get-class` operation to retrieve flow class definitions. +This command uses the [Flow API](../apis/api-flow.md) with the `get-class` operation to retrieve flow blueprint definitions. ## Advanced Usage ### Configuration Diff ```bash -# Compare flow class versions -tg-get-flow-class -n "flow-v1" > v1.json -tg-get-flow-class -n "flow-v2" > v2.json +# Compare flow blueprint versions +tg-get-flow-blueprint -n "flow-v1" > v1.json +tg-get-flow-blueprint -n "flow-v2" > v2.json diff -u v1.json v2.json ``` ### Extract Queue Information ```bash -# Get all queue names from flow class -tg-get-flow-class -n "my-flow" | jq -r ' +# Get all queue names from flow blueprint +tg-get-flow-blueprint -n "my-flow" | jq -r ' .interfaces | to_entries[] | if .value | type == "object" then @@ -275,16 +275,16 @@ tg-get-flow-class -n "my-flow" | jq -r ' flow_class="$1" if [ -z "$flow_class" ]; then - echo "Usage: $0 " + echo "Usage: $0 " exit 1 fi -echo "Validating flow class: $flow_class" +echo "Validating flow blueprint: $flow_class" # Get configuration -config=$(tg-get-flow-class -n "$flow_class" 2>/dev/null) +config=$(tg-get-flow-blueprint -n "$flow_class" 2>/dev/null) if [ $? -ne 0 ]; then - echo "ERROR: Flow class not found" + echo "ERROR: Flow blueprint not found" exit 1 fi @@ -307,32 +307,32 @@ if [ -z "$interfaces" ] || [ "$interfaces" = "null" ]; then exit 1 fi -echo "Flow class validation passed" +echo "Flow blueprint validation passed" ``` ## Best Practices -1. **Regular Backups**: Save flow class definitions before modifications +1. **Regular Backups**: Save flow blueprint definitions before modifications 2. **Version Control**: Store configurations in version control systems -3. **Documentation**: Include meaningful descriptions in flow classes +3. **Documentation**: Include meaningful descriptions in flow blueprintes 4. **Validation**: Validate JSON structure before using configurations 5. **Template Management**: Use existing classes as templates for new ones -6. **Change Tracking**: Document changes when updating flow classes +6. **Change Tracking**: Document changes when updating flow blueprintes ## Troubleshooting ### Empty Output ```bash # If command returns empty output -tg-get-flow-class -n "my-flow" -# Check if flow class exists -tg-show-flow-classes | grep "my-flow" +tg-get-flow-blueprint -n "my-flow" +# Check if flow blueprint exists +tg-show-flow-blueprints | grep "my-flow" ``` ### Invalid JSON Output ```bash # If output appears corrupted -tg-get-flow-class -n "my-flow" | jq . +tg-get-flow-blueprint -n "my-flow" | jq . # Should show parsing error if JSON is invalid ``` diff --git a/docs/cli/tg-invoke-mcp-tool.md b/docs/cli/tg-invoke-mcp-tool.md index 0f6f9fdf..61a061d8 100644 --- a/docs/cli/tg-invoke-mcp-tool.md +++ b/docs/cli/tg-invoke-mcp-tool.md @@ -435,7 +435,7 @@ tg-invoke-mcp-tool -n tool-name -P '{}' tg-show-flows | grep "flow-id" # Verify flow supports MCP tools -tg-get-flow-class -n "flow-class" | jq '.interfaces.mcp_tool' +tg-get-flow-blueprint -n "flow-class" | jq '.interfaces.mcp_tool' ``` ### Connection Issues diff --git a/docs/cli/tg-invoke-prompt.md b/docs/cli/tg-invoke-prompt.md index a8c48ecb..1006be6f 100644 --- a/docs/cli/tg-invoke-prompt.md +++ b/docs/cli/tg-invoke-prompt.md @@ -426,5 +426,5 @@ echo "variable=value" | grep "=" tg-show-flows | grep "flow-id" # Verify flow has prompt service -tg-get-flow-class -n "flow-class" | jq '.interfaces.prompt' +tg-get-flow-blueprint -n "flow-class" | jq '.interfaces.prompt' ``` \ No newline at end of file diff --git a/docs/cli/tg-put-flow-class.md b/docs/cli/tg-put-flow-blueprint.md similarity index 76% rename from docs/cli/tg-put-flow-class.md rename to docs/cli/tg-put-flow-blueprint.md index 7b62b5e4..059d4efa 100644 --- a/docs/cli/tg-put-flow-class.md +++ b/docs/cli/tg-put-flow-blueprint.md @@ -1,25 +1,25 @@ -# tg-put-flow-class +# tg-put-flow-blueprint -Uploads or updates a flow class definition in TrustGraph. +Uploads or updates a flow blueprint definition in TrustGraph. ## Synopsis ```bash -tg-put-flow-class -n CLASS_NAME -c CONFIG_JSON [options] +tg-put-flow-blueprint -n CLASS_NAME -c CONFIG_JSON [options] ``` ## Description -The `tg-put-flow-class` command creates or updates a flow class definition in TrustGraph. Flow classes are templates that define processing pipeline configurations, service interfaces, and resource requirements. These classes are used by `tg-start-flow` to create running flow instances. +The `tg-put-flow-blueprint` command creates or updates a flow blueprint definition in TrustGraph. Flow blueprintes are templates that define processing pipeline configurations, service interfaces, and resource requirements. These classes are used by `tg-start-flow` to create running flow instances. -Flow classes define the structure and capabilities of processing flows, including which services are available and how they connect to Pulsar queues. +Flow blueprintes define the structure and capabilities of processing flows, including which services are available and how they connect to Pulsar queues. ## Options ### Required Arguments -- `-n, --class-name CLASS_NAME`: Name for the flow class -- `-c, --config CONFIG_JSON`: Flow class configuration as raw JSON string +- `-n, --blueprint-name CLASS_NAME`: Name for the flow blueprint +- `-c, --config CONFIG_JSON`: Flow blueprint configuration as raw JSON string ### Optional Arguments @@ -27,16 +27,16 @@ Flow classes define the structure and capabilities of processing flows, includin ## Examples -### Basic Flow Class Creation +### Basic Flow Blueprint Creation ```bash -tg-put-flow-class \ +tg-put-flow-blueprint \ -n "simple-processing" \ -c '{"description": "Simple text processing flow", "interfaces": {"text-completion": {"request": "non-persistent://tg/request/text-completion:simple", "response": "non-persistent://tg/response/text-completion:simple"}}}' ``` -### Document Processing Flow Class +### Document Processing Flow Blueprint ```bash -tg-put-flow-class \ +tg-put-flow-blueprint \ -n "document-analysis" \ -c '{ "description": "Document analysis and RAG processing", @@ -80,14 +80,14 @@ cat > research-flow.json << 'EOF' } EOF -# Upload the flow class -tg-put-flow-class -n "research-analysis" -c "$(cat research-flow.json)" +# Upload the flow blueprint +tg-put-flow-blueprint -n "research-analysis" -c "$(cat research-flow.json)" ``` -### Update Existing Flow Class +### Update Existing Flow Blueprint ```bash -# Modify existing flow class by adding new service -tg-put-flow-class \ +# Modify existing flow blueprint by adding new service +tg-put-flow-blueprint \ -n "existing-flow" \ -c '{ "description": "Updated flow with new capabilities", @@ -104,14 +104,14 @@ tg-put-flow-class \ }' ``` -## Flow Class Configuration Format +## Flow Blueprint Configuration Format ### Required Fields #### Description ```json { - "description": "Human-readable description of the flow class" + "description": "Human-readable description of the flow blueprint" } ``` @@ -180,9 +180,9 @@ persistent://tg/flow/{service}:{flow-identifier} ## Complete Example -### Comprehensive Flow Class +### Comprehensive Flow Blueprint ```bash -tg-put-flow-class \ +tg-put-flow-blueprint \ -n "full-processing-pipeline" \ -c '{ "description": "Complete document processing and analysis pipeline", @@ -234,11 +234,11 @@ tg-put-flow-class \ Successful upload typically produces no output: ```bash -# Upload flow class (no output expected) -tg-put-flow-class -n "my-flow" -c '{"description": "test", "interfaces": {}}' +# Upload flow blueprint (no output expected) +tg-put-flow-blueprint -n "my-flow" -c '{"description": "test", "interfaces": {}}' # Verify upload -tg-show-flow-classes | grep "my-flow" +tg-show-flow-blueprints | grep "my-flow" ``` ## Error Handling @@ -276,27 +276,27 @@ config='{"description": "test flow", "interfaces": {}}' echo "$config" | jq . > /dev/null && echo "Valid JSON" || echo "Invalid JSON" ``` -### Flow Class Verification +### Flow Blueprint Verification ```bash -# After uploading, verify the flow class exists -tg-show-flow-classes | grep "my-flow-class" +# After uploading, verify the flow blueprint exists +tg-show-flow-blueprints | grep "my-flow-class" -# Get the flow class definition to verify content -tg-get-flow-class -n "my-flow-class" +# Get the flow blueprint definition to verify content +tg-get-flow-blueprint -n "my-flow-class" ``` -## Flow Class Lifecycle +## Flow Blueprint Lifecycle ### Development Workflow ```bash -# 1. Create flow class -tg-put-flow-class -n "dev-flow" -c "$dev_config" +# 1. Create flow blueprint +tg-put-flow-blueprint -n "dev-flow" -c "$dev_config" # 2. Test with flow instance tg-start-flow -n "dev-flow" -i "test-instance" -d "Testing" -# 3. Update flow class as needed -tg-put-flow-class -n "dev-flow" -c "$updated_config" +# 3. Update flow blueprint as needed +tg-put-flow-blueprint -n "dev-flow" -c "$updated_config" # 4. Restart flow instance with updates tg-stop-flow -i "test-instance" @@ -309,53 +309,53 @@ tg-start-flow -n "dev-flow" -i "test-instance" -d "Testing updated" ## Related Commands -- [`tg-get-flow-class`](tg-get-flow-class.md) - Retrieve flow class definitions -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes -- [`tg-delete-flow-class`](tg-delete-flow-class.md) - Remove flow class definitions +- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definitions +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes +- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definitions - [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes ## API Integration -This command uses the [Flow API](../apis/api-flow.md) with the `put-class` operation to store flow class definitions. +This command uses the [Flow API](../apis/api-flow.md) with the `put-class` operation to store flow blueprint definitions. ## Use Cases ### Custom Processing Pipelines ```bash # Create specialized medical analysis flow -tg-put-flow-class -n "medical-nlp" -c "$medical_config" +tg-put-flow-blueprint -n "medical-nlp" -c "$medical_config" ``` ### Development Environments ```bash # Create lightweight development flow -tg-put-flow-class -n "dev-minimal" -c "$minimal_config" +tg-put-flow-blueprint -n "dev-minimal" -c "$minimal_config" ``` ### Production Deployments ```bash # Create robust production flow with all services -tg-put-flow-class -n "production-full" -c "$production_config" +tg-put-flow-blueprint -n "production-full" -c "$production_config" ``` ### Domain-Specific Workflows ```bash # Create legal document analysis flow -tg-put-flow-class -n "legal-analysis" -c "$legal_config" +tg-put-flow-blueprint -n "legal-analysis" -c "$legal_config" ``` ## Best Practices -1. **Descriptive Names**: Use clear, descriptive flow class names +1. **Descriptive Names**: Use clear, descriptive flow blueprint names 2. **Comprehensive Descriptions**: Include detailed descriptions of flow capabilities 3. **Consistent Naming**: Follow consistent queue naming conventions -4. **Version Control**: Store flow class configurations in version control -5. **Testing**: Test flow classes thoroughly before production use -6. **Documentation**: Document flow class purposes and requirements +4. **Version Control**: Store flow blueprint configurations in version control +5. **Testing**: Test flow blueprintes thoroughly before production use +6. **Documentation**: Document flow blueprint purposes and requirements ## Template Examples -### Minimal Flow Class +### Minimal Flow Blueprint ```json { "description": "Minimal text processing flow", @@ -368,7 +368,7 @@ tg-put-flow-class -n "legal-analysis" -c "$legal_config" } ``` -### RAG-Focused Flow Class +### RAG-Focused Flow Blueprint ```json { "description": "Retrieval Augmented Generation flow", @@ -389,7 +389,7 @@ tg-put-flow-class -n "legal-analysis" -c "$legal_config" } ``` -### Document Processing Flow Class +### Document Processing Flow Blueprint ```json { "description": "Document ingestion and processing flow", diff --git a/docs/cli/tg-show-config.md b/docs/cli/tg-show-config.md index 2fa3c64c..ac870a7f 100644 --- a/docs/cli/tg-show-config.md +++ b/docs/cli/tg-show-config.md @@ -43,7 +43,7 @@ Version: 42 { "flows": { "default": { - "class-name": "document-rag+graph-rag", + "blueprint-name": "document-rag+graph-rag", "description": "Default processing flow", "interfaces": { "agent": { @@ -77,7 +77,7 @@ Version: 42 ### Flow Definitions Flow configurations showing: -- **class-name**: The flow class being used +- **blueprint-name**: The flow blueprint being used - **description**: Human-readable flow description - **interfaces**: Pulsar queue names for each service @@ -131,7 +131,7 @@ Exception: Unauthorized ## Related Commands -- [`tg-put-flow-class`](tg-put-flow-class.md) - Update flow class definitions +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Update flow blueprint definitions - [`tg-show-flows`](tg-show-flows.md) - List active flows - [`tg-set-prompt`](tg-set-prompt.md) - Configure prompt templates - [`tg-set-token-costs`](tg-set-token-costs.md) - Configure token costs diff --git a/docs/cli/tg-show-flow-blueprints.md b/docs/cli/tg-show-flow-blueprints.md new file mode 100644 index 00000000..40667dda --- /dev/null +++ b/docs/cli/tg-show-flow-blueprints.md @@ -0,0 +1,330 @@ +# tg-show-flow-blueprints + +Lists all defined flow blueprintes in TrustGraph with their descriptions and tags. + +## Synopsis + +```bash +tg-show-flow-blueprints [options] +``` + +## Description + +The `tg-show-flow-blueprints` command displays a formatted table of all flow blueprint definitions currently stored in TrustGraph. Each flow blueprint is shown with its name, description, and associated tags. + +Flow blueprintes are templates that define the structure and services available for creating flow instances. This command helps you understand what flow blueprintes are available for use. + +## Options + +### Optional Arguments + +- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) + +## Examples + +### List All Flow Blueprintes +```bash +tg-show-flow-blueprints +``` + +Output: +``` ++-----------------+----------------------------------+----------------------+ +| flow blueprint | description | tags | ++-----------------+----------------------------------+----------------------+ +| document-proc | Document processing pipeline | production, nlp | +| data-analysis | Data analysis and visualization | analytics, dev | +| web-scraper | Web content extraction flow | scraping, batch | +| chat-assistant | Conversational AI assistant | ai, interactive | ++-----------------+----------------------------------+----------------------+ +``` + +### Using Custom API URL +```bash +tg-show-flow-blueprints -u http://production:8088/ +``` + +### Filter Flow Blueprintes +```bash +# Show only production-tagged flow blueprintes +tg-show-flow-blueprints | grep "production" + +# Count total flow blueprintes +tg-show-flow-blueprints | grep -c "^|" + +# Show flow blueprintes with specific patterns +tg-show-flow-blueprints | grep -E "(document|text|nlp)" +``` + +## Output Format + +The command displays results in a formatted table with columns: + +- **flow blueprint**: The unique name/identifier of the flow blueprint +- **description**: Human-readable description of the flow blueprint purpose +- **tags**: Comma-separated list of categorization tags + +### Empty Results +If no flow blueprintes exist: +``` +No flows. +``` + +## Use Cases + +### Flow Blueprint Discovery +```bash +# Find available flow blueprintes for document processing +tg-show-flow-blueprints | grep -i document + +# List all AI-related flow blueprintes +tg-show-flow-blueprints | grep -i "ai\|nlp\|chat\|assistant" + +# Find development vs production flow blueprintes +tg-show-flow-blueprints | grep -E "(dev|test|staging)" +tg-show-flow-blueprints | grep "production" +``` + +### Flow Blueprint Management +```bash +# Get list of flow blueprint names for scripting +tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); print $2}' | grep -v "^$" + +# Check if specific flow blueprint exists +if tg-show-flow-blueprints | grep -q "target-flow"; then + echo "Flow blueprint 'target-flow' exists" +else + echo "Flow blueprint 'target-flow' not found" +fi +``` + +### Environment Comparison +```bash +# Compare flow blueprintes between environments +echo "Development environment:" +tg-show-flow-blueprints -u http://dev:8088/ + +echo "Production environment:" +tg-show-flow-blueprints -u http://prod:8088/ +``` + +### Reporting and Documentation +```bash +# Generate flow blueprint inventory report +echo "Flow Blueprint Inventory - $(date)" > flow-inventory.txt +echo "=====================================" >> flow-inventory.txt +tg-show-flow-blueprints >> flow-inventory.txt + +# Create CSV export +echo "flow_class,description,tags" > flow-classes.csv +tg-show-flow-blueprints | awk 'NR>3 && /^\|/ { + gsub(/^\| */, "", $0); gsub(/ *\|$/, "", $0); + gsub(/ *\| */, ",", $0); print $0 +}' >> flow-classes.csv +``` + +## Error Handling + +### Connection Errors +```bash +Exception: Connection refused +``` +**Solution**: Check the API URL and ensure TrustGraph is running. + +### Permission Errors +```bash +Exception: Access denied to list flow blueprintes +``` +**Solution**: Verify user permissions for reading flow blueprint definitions. + +### Network Timeouts +```bash +Exception: Request timeout +``` +**Solution**: Check network connectivity and API server status. + +## Integration with Other Commands + +### Flow Blueprint Lifecycle +```bash +# 1. List available flow blueprintes +tg-show-flow-blueprints + +# 2. Get details of specific flow blueprint +tg-get-flow-blueprint -n "interesting-flow" + +# 3. Start flow instance from class +tg-start-flow -n "interesting-flow" -i "my-instance" + +# 4. Monitor flow instance +tg-show-flows | grep "my-instance" +``` + +### Bulk Operations +```bash +# Process all flow blueprintes +tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ +while read class_name; do + if [ -n "$class_name" ]; then + echo "Processing flow blueprint: $class_name" + tg-get-flow-blueprint -n "$class_name" > "backup-$class_name.json" + fi +done +``` + +### Automated Validation +```bash +# Check flow blueprint health +echo "Validating flow blueprintes..." +tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ +while read class_name; do + if [ -n "$class_name" ]; then + echo -n "Checking $class_name... " + if tg-get-flow-blueprint -n "$class_name" > /dev/null 2>&1; then + echo "OK" + else + echo "ERROR" + fi + fi +done +``` + +## Advanced Usage + +### Flow Blueprint Analysis +```bash +# Analyze flow blueprint distribution by tags +tg-show-flow-blueprints | awk 'NR>3 && /^\|/ { + # Extract tags column + split($0, parts, "|"); + tags = parts[4]; + gsub(/^ *| *$/, "", tags); + if (tags) { + split(tags, tag_array, ","); + for (i in tag_array) { + gsub(/^ *| *$/, "", tag_array[i]); + if (tag_array[i]) print tag_array[i]; + } + } +}' | sort | uniq -c | sort -nr +``` + +### Environment Synchronization +```bash +# Sync flow blueprintes between environments +echo "Synchronizing flow blueprintes from dev to staging..." + +# Get list from development +dev_classes=$(tg-show-flow-blueprints -u http://dev:8088/ | \ + awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}') + +# Check each class in staging +for class in $dev_classes; do + if tg-show-flow-blueprints -u http://staging:8088/ | grep -q "$class"; then + echo "$class: Already exists in staging" + else + echo "$class: Missing in staging - needs sync" + # Get from dev and put to staging + tg-get-flow-blueprint -n "$class" -u http://dev:8088/ > temp-class.json + tg-put-flow-blueprint -n "$class" -c "$(cat temp-class.json)" -u http://staging:8088/ + rm temp-class.json + fi +done +``` + +### Monitoring Script +```bash +#!/bin/bash +# monitor-flow-classes.sh +api_url="${1:-http://localhost:8088/}" + +echo "Flow Blueprint Monitoring Report - $(date)" +echo "API URL: $api_url" +echo "----------------------------------------" + +# Total count +total=$(tg-show-flow-blueprints -u "$api_url" | grep -c "^|" 2>/dev/null || echo "0") +echo "Total flow blueprintes: $((total - 3))" # Subtract header rows + +# Tag analysis +echo -e "\nTag distribution:" +tg-show-flow-blueprints -u "$api_url" | awk 'NR>3 && /^\|/ { + split($0, parts, "|"); + tags = parts[4]; + gsub(/^ *| *$/, "", tags); + if (tags) { + split(tags, tag_array, ","); + for (i in tag_array) { + gsub(/^ *| *$/, "", tag_array[i]); + if (tag_array[i]) print tag_array[i]; + } + } +}' | sort | uniq -c | sort -nr + +# Health check +echo -e "\nHealth check:" +healthy=0 +unhealthy=0 +tg-show-flow-blueprints -u "$api_url" | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ +while read class_name; do + if [ -n "$class_name" ]; then + if tg-get-flow-blueprint -n "$class_name" -u "$api_url" > /dev/null 2>&1; then + healthy=$((healthy + 1)) + else + unhealthy=$((unhealthy + 1)) + echo " ERROR: $class_name" + fi + fi +done + +echo "Healthy: $healthy, Unhealthy: $unhealthy" +``` + +## Environment Variables + +- `TRUSTGRAPH_URL`: Default API URL + +## Related Commands + +- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve specific flow blueprint definitions +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Create/update flow blueprint definitions +- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Delete flow blueprint definitions +- [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes +- [`tg-show-flows`](tg-show-flows.md) - List active flow instances + +## API Integration + +This command uses the [Flow API](../apis/api-flow.md) with the `list-classes` operation to retrieve flow blueprint listings. + +## Best Practices + +1. **Regular Inventory**: Periodically review available flow blueprintes +2. **Documentation**: Ensure flow blueprintes have meaningful descriptions +3. **Tagging**: Use consistent tagging for better organization +4. **Cleanup**: Remove unused or deprecated flow blueprintes +5. **Monitoring**: Include flow blueprint health checks in monitoring +6. **Environment Parity**: Keep flow blueprintes synchronized across environments + +## Troubleshooting + +### No Output +```bash +# If command returns no output, check API connectivity +tg-show-flow-blueprints -u http://localhost:8088/ +# Verify TrustGraph is running and accessible +``` + +### Formatting Issues +```bash +# If table formatting is broken, check terminal width +export COLUMNS=120 +tg-show-flow-blueprints +``` + +### Missing Flow Blueprintes +```bash +# If expected flow blueprintes are missing, verify: +# 1. Correct API URL +# 2. Database connectivity +# 3. Flow blueprint definitions are properly stored +``` \ No newline at end of file diff --git a/docs/cli/tg-show-flow-classes.md b/docs/cli/tg-show-flow-classes.md deleted file mode 100644 index f81d9331..00000000 --- a/docs/cli/tg-show-flow-classes.md +++ /dev/null @@ -1,330 +0,0 @@ -# tg-show-flow-classes - -Lists all defined flow classes in TrustGraph with their descriptions and tags. - -## Synopsis - -```bash -tg-show-flow-classes [options] -``` - -## Description - -The `tg-show-flow-classes` command displays a formatted table of all flow class definitions currently stored in TrustGraph. Each flow class is shown with its name, description, and associated tags. - -Flow classes are templates that define the structure and services available for creating flow instances. This command helps you understand what flow classes are available for use. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### List All Flow Classes -```bash -tg-show-flow-classes -``` - -Output: -``` -+-----------------+----------------------------------+----------------------+ -| flow class | description | tags | -+-----------------+----------------------------------+----------------------+ -| document-proc | Document processing pipeline | production, nlp | -| data-analysis | Data analysis and visualization | analytics, dev | -| web-scraper | Web content extraction flow | scraping, batch | -| chat-assistant | Conversational AI assistant | ai, interactive | -+-----------------+----------------------------------+----------------------+ -``` - -### Using Custom API URL -```bash -tg-show-flow-classes -u http://production:8088/ -``` - -### Filter Flow Classes -```bash -# Show only production-tagged flow classes -tg-show-flow-classes | grep "production" - -# Count total flow classes -tg-show-flow-classes | grep -c "^|" - -# Show flow classes with specific patterns -tg-show-flow-classes | grep -E "(document|text|nlp)" -``` - -## Output Format - -The command displays results in a formatted table with columns: - -- **flow class**: The unique name/identifier of the flow class -- **description**: Human-readable description of the flow class purpose -- **tags**: Comma-separated list of categorization tags - -### Empty Results -If no flow classes exist: -``` -No flows. -``` - -## Use Cases - -### Flow Class Discovery -```bash -# Find available flow classes for document processing -tg-show-flow-classes | grep -i document - -# List all AI-related flow classes -tg-show-flow-classes | grep -i "ai\|nlp\|chat\|assistant" - -# Find development vs production flow classes -tg-show-flow-classes | grep -E "(dev|test|staging)" -tg-show-flow-classes | grep "production" -``` - -### Flow Class Management -```bash -# Get list of flow class names for scripting -tg-show-flow-classes | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); print $2}' | grep -v "^$" - -# Check if specific flow class exists -if tg-show-flow-classes | grep -q "target-flow"; then - echo "Flow class 'target-flow' exists" -else - echo "Flow class 'target-flow' not found" -fi -``` - -### Environment Comparison -```bash -# Compare flow classes between environments -echo "Development environment:" -tg-show-flow-classes -u http://dev:8088/ - -echo "Production environment:" -tg-show-flow-classes -u http://prod:8088/ -``` - -### Reporting and Documentation -```bash -# Generate flow class inventory report -echo "Flow Class Inventory - $(date)" > flow-inventory.txt -echo "=====================================" >> flow-inventory.txt -tg-show-flow-classes >> flow-inventory.txt - -# Create CSV export -echo "flow_class,description,tags" > flow-classes.csv -tg-show-flow-classes | awk 'NR>3 && /^\|/ { - gsub(/^\| */, "", $0); gsub(/ *\|$/, "", $0); - gsub(/ *\| */, ",", $0); print $0 -}' >> flow-classes.csv -``` - -## Error Handling - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied to list flow classes -``` -**Solution**: Verify user permissions for reading flow class definitions. - -### Network Timeouts -```bash -Exception: Request timeout -``` -**Solution**: Check network connectivity and API server status. - -## Integration with Other Commands - -### Flow Class Lifecycle -```bash -# 1. List available flow classes -tg-show-flow-classes - -# 2. Get details of specific flow class -tg-get-flow-class -n "interesting-flow" - -# 3. Start flow instance from class -tg-start-flow -n "interesting-flow" -i "my-instance" - -# 4. Monitor flow instance -tg-show-flows | grep "my-instance" -``` - -### Bulk Operations -```bash -# Process all flow classes -tg-show-flow-classes | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - echo "Processing flow class: $class_name" - tg-get-flow-class -n "$class_name" > "backup-$class_name.json" - fi -done -``` - -### Automated Validation -```bash -# Check flow class health -echo "Validating flow classes..." -tg-show-flow-classes | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - echo -n "Checking $class_name... " - if tg-get-flow-class -n "$class_name" > /dev/null 2>&1; then - echo "OK" - else - echo "ERROR" - fi - fi -done -``` - -## Advanced Usage - -### Flow Class Analysis -```bash -# Analyze flow class distribution by tags -tg-show-flow-classes | awk 'NR>3 && /^\|/ { - # Extract tags column - split($0, parts, "|"); - tags = parts[4]; - gsub(/^ *| *$/, "", tags); - if (tags) { - split(tags, tag_array, ","); - for (i in tag_array) { - gsub(/^ *| *$/, "", tag_array[i]); - if (tag_array[i]) print tag_array[i]; - } - } -}' | sort | uniq -c | sort -nr -``` - -### Environment Synchronization -```bash -# Sync flow classes between environments -echo "Synchronizing flow classes from dev to staging..." - -# Get list from development -dev_classes=$(tg-show-flow-classes -u http://dev:8088/ | \ - awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}') - -# Check each class in staging -for class in $dev_classes; do - if tg-show-flow-classes -u http://staging:8088/ | grep -q "$class"; then - echo "$class: Already exists in staging" - else - echo "$class: Missing in staging - needs sync" - # Get from dev and put to staging - tg-get-flow-class -n "$class" -u http://dev:8088/ > temp-class.json - tg-put-flow-class -n "$class" -c "$(cat temp-class.json)" -u http://staging:8088/ - rm temp-class.json - fi -done -``` - -### Monitoring Script -```bash -#!/bin/bash -# monitor-flow-classes.sh -api_url="${1:-http://localhost:8088/}" - -echo "Flow Class Monitoring Report - $(date)" -echo "API URL: $api_url" -echo "----------------------------------------" - -# Total count -total=$(tg-show-flow-classes -u "$api_url" | grep -c "^|" 2>/dev/null || echo "0") -echo "Total flow classes: $((total - 3))" # Subtract header rows - -# Tag analysis -echo -e "\nTag distribution:" -tg-show-flow-classes -u "$api_url" | awk 'NR>3 && /^\|/ { - split($0, parts, "|"); - tags = parts[4]; - gsub(/^ *| *$/, "", tags); - if (tags) { - split(tags, tag_array, ","); - for (i in tag_array) { - gsub(/^ *| *$/, "", tag_array[i]); - if (tag_array[i]) print tag_array[i]; - } - } -}' | sort | uniq -c | sort -nr - -# Health check -echo -e "\nHealth check:" -healthy=0 -unhealthy=0 -tg-show-flow-classes -u "$api_url" | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - if tg-get-flow-class -n "$class_name" -u "$api_url" > /dev/null 2>&1; then - healthy=$((healthy + 1)) - else - unhealthy=$((unhealthy + 1)) - echo " ERROR: $class_name" - fi - fi -done - -echo "Healthy: $healthy, Unhealthy: $unhealthy" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-get-flow-class`](tg-get-flow-class.md) - Retrieve specific flow class definitions -- [`tg-put-flow-class`](tg-put-flow-class.md) - Create/update flow class definitions -- [`tg-delete-flow-class`](tg-delete-flow-class.md) - Delete flow class definitions -- [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes -- [`tg-show-flows`](tg-show-flows.md) - List active flow instances - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `list-classes` operation to retrieve flow class listings. - -## Best Practices - -1. **Regular Inventory**: Periodically review available flow classes -2. **Documentation**: Ensure flow classes have meaningful descriptions -3. **Tagging**: Use consistent tagging for better organization -4. **Cleanup**: Remove unused or deprecated flow classes -5. **Monitoring**: Include flow class health checks in monitoring -6. **Environment Parity**: Keep flow classes synchronized across environments - -## Troubleshooting - -### No Output -```bash -# If command returns no output, check API connectivity -tg-show-flow-classes -u http://localhost:8088/ -# Verify TrustGraph is running and accessible -``` - -### Formatting Issues -```bash -# If table formatting is broken, check terminal width -export COLUMNS=120 -tg-show-flow-classes -``` - -### Missing Flow Classes -```bash -# If expected flow classes are missing, verify: -# 1. Correct API URL -# 2. Database connectivity -# 3. Flow class definitions are properly stored -``` \ No newline at end of file diff --git a/docs/cli/tg-show-flow-state.md b/docs/cli/tg-show-flow-state.md index d0741522..dc24d700 100644 --- a/docs/cli/tg-show-flow-state.md +++ b/docs/cli/tg-show-flow-state.md @@ -1,6 +1,6 @@ # tg-show-flow-state -Displays the processor states for a specific flow and its associated flow class. +Displays the processor states for a specific flow and its associated flow blueprint. ## Synopsis @@ -10,7 +10,7 @@ tg-show-flow-state [options] ## Description -The `tg-show-flow-state` command shows the current state of processors within a specific TrustGraph flow instance and its corresponding flow class. It queries the metrics system to determine which processing components are running and displays their status with visual indicators. +The `tg-show-flow-state` command shows the current state of processors within a specific TrustGraph flow instance and its corresponding flow blueprint. It queries the metrics system to determine which processing components are running and displays their status with visual indicators. This command is essential for monitoring flow health and debugging processing issues. @@ -51,7 +51,7 @@ tg-show-flow-state \ ## Output Format -The command displays processor states for both the flow instance and its flow class: +The command displays processor states for both the flow instance and its flow blueprint: ``` Flow production-flow @@ -75,7 +75,7 @@ Class document-processing-v2 ### Information Displayed - **Flow Section**: Shows the state of processors in the specific flow instance -- **Class Section**: Shows the state of processors in the flow class template +- **Class Section**: Shows the state of processors in the flow blueprint template - **Processor Names**: Individual processing components within the flow ## Use Cases diff --git a/docs/cli/tg-show-flows.md b/docs/cli/tg-show-flows.md index cfdaff90..72a1c809 100644 --- a/docs/cli/tg-show-flows.md +++ b/docs/cli/tg-show-flows.md @@ -135,7 +135,7 @@ Exception: Unauthorized - [`tg-start-flow`](tg-start-flow.md) - Start a new flow instance - [`tg-stop-flow`](tg-stop-flow.md) - Stop a running flow -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes - [`tg-show-flow-state`](tg-show-flow-state.md) - Show detailed flow status - [`tg-show-config`](tg-show-config.md) - Show complete system configuration @@ -186,7 +186,7 @@ tg-show-flows | grep "graph-rag request" ### Flow Information - **id**: Unique flow instance identifier -- **class**: Flow class name used to create the instance +- **class**: Flow blueprint name used to create the instance - **desc**: Human-readable flow description - **queue**: Service interfaces and their Pulsar queue names @@ -196,7 +196,7 @@ Queue names indicate: - **Tenant**: Usually `tg` - **Namespace**: `request`, `response`, or `flow` - **Service**: The specific service name -- **Flow Identifier**: Either flow class or flow ID +- **Flow Identifier**: Either flow blueprint or flow ID ## Best Practices diff --git a/docs/cli/tg-start-flow.md b/docs/cli/tg-start-flow.md index c0b2ad7a..0257d855 100644 --- a/docs/cli/tg-start-flow.md +++ b/docs/cli/tg-start-flow.md @@ -1,6 +1,6 @@ # tg-start-flow -Starts a processing flow using a defined flow class. +Starts a processing flow using a defined flow blueprint. ## Synopsis @@ -10,7 +10,7 @@ tg-start-flow -n CLASS_NAME -i FLOW_ID -d DESCRIPTION [options] ## Description -The `tg-start-flow` command creates and starts a new processing flow instance based on a predefined flow class. Flow classes define the processing pipeline configuration, while flow instances are running implementations of those classes with specific identifiers. +The `tg-start-flow` command creates and starts a new processing flow instance based on a predefined flow blueprint. Flow blueprintes define the processing pipeline configuration, while flow instances are running implementations of those classes with specific identifiers. Once started, a flow provides endpoints for document processing, knowledge queries, and other TrustGraph services through its configured interfaces. @@ -18,7 +18,7 @@ Once started, a flow provides endpoints for document processing, knowledge queri ### Required Arguments -- `-n, --class-name CLASS_NAME`: Name of the flow class to instantiate +- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to instantiate - `-i, --flow-id FLOW_ID`: Unique identifier for the new flow instance - `-d, --description DESCRIPTION`: Human-readable description of the flow @@ -36,7 +36,7 @@ tg-start-flow \ -d "Research document processing pipeline" ``` -### Start Custom Flow Class +### Start Custom Flow Blueprint ```bash tg-start-flow \ -n "medical-analysis" \ @@ -55,15 +55,15 @@ tg-start-flow \ ## Prerequisites -### Flow Class Must Exist -Before starting a flow, the flow class must be available in the system: +### Flow Blueprint Must Exist +Before starting a flow, the flow blueprint must be available in the system: ```bash -# Check available flow classes -tg-show-flow-classes +# Check available flow blueprintes +tg-show-flow-blueprints -# Upload a flow class if needed -tg-put-flow-class -n "my-class" -f flow-definition.json +# Upload a flow blueprint if needed +tg-put-flow-blueprint -n "my-class" -f flow-definition.json ``` ### System Requirements @@ -73,7 +73,7 @@ tg-put-flow-class -n "my-class" -f flow-definition.json ## Flow Lifecycle -1. **Flow Class Definition**: Flow classes define processing pipelines +1. **Flow Blueprint Definition**: Flow blueprintes define processing pipelines 2. **Flow Instance Creation**: `tg-start-flow` creates a running instance 3. **Service Availability**: Flow provides configured service endpoints 4. **Processing**: Documents and queries can be processed through the flow @@ -81,11 +81,11 @@ tg-put-flow-class -n "my-class" -f flow-definition.json ## Error Handling -### Flow Class Not Found +### Flow Blueprint Not Found ```bash -Exception: Flow class 'invalid-class' not found +Exception: Flow blueprint 'invalid-class' not found ``` -**Solution**: Check available flow classes with `tg-show-flow-classes` and ensure the class name is correct. +**Solution**: Check available flow blueprintes with `tg-show-flow-blueprints` and ensure the class name is correct. ### Flow ID Already Exists ```bash @@ -137,8 +137,8 @@ Once started, flows provide service interfaces based on their class definition. - [`tg-stop-flow`](tg-stop-flow.md) - Stop a running flow - [`tg-show-flows`](tg-show-flows.md) - List active flows and their interfaces -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes -- [`tg-put-flow-class`](tg-put-flow-class.md) - Upload/update flow class definitions +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes +- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definitions - [`tg-show-flow-state`](tg-show-flow-state.md) - Check flow status ## API Integration diff --git a/docs/cli/tg-start-library-processing.md b/docs/cli/tg-start-library-processing.md index 534cedac..ee5ceb33 100644 --- a/docs/cli/tg-start-library-processing.md +++ b/docs/cli/tg-start-library-processing.md @@ -434,7 +434,7 @@ Exception: Processing ID already exists ```bash Exception: Flow instance not found ``` -**Solution**: Verify flow exists with `tg-show-flows` or `tg-show-flow-classes`. +**Solution**: Verify flow exists with `tg-show-flows` or `tg-show-flow-blueprints`. ### Insufficient Resources ```bash diff --git a/docs/cli/tg-stop-flow.md b/docs/cli/tg-stop-flow.md index 1e088762..97ad1696 100644 --- a/docs/cli/tg-stop-flow.md +++ b/docs/cli/tg-stop-flow.md @@ -171,7 +171,7 @@ done - [`tg-start-flow`](tg-start-flow.md) - Start a new flow instance - [`tg-show-flows`](tg-show-flows.md) - List active flows - [`tg-show-flow-state`](tg-show-flow-state.md) - Check detailed flow status -- [`tg-show-flow-classes`](tg-show-flow-classes.md) - List available flow classes +- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes ## API Integration diff --git a/docs/tech-specs/flow-class-definition.md b/docs/tech-specs/flow-class-definition.md index c6f0c7b0..9e7ed66b 100644 --- a/docs/tech-specs/flow-class-definition.md +++ b/docs/tech-specs/flow-class-definition.md @@ -1,15 +1,15 @@ -# Flow Class Definition Specification +# Flow Blueprint Definition Specification ## Overview -A flow class defines a complete dataflow pattern template in the TrustGraph system. When instantiated, it creates an interconnected network of processors that handle data ingestion, processing, storage, and querying as a unified system. +A flow blueprint defines a complete dataflow pattern template in the TrustGraph system. When instantiated, it creates an interconnected network of processors that handle data ingestion, processing, storage, and querying as a unified system. ## Structure -A flow class definition consists of five main sections: +A flow blueprint definition consists of five main sections: ### 1. Class Section -Defines shared service processors that are instantiated once per flow class. These processors handle requests from all flow instances of this class. +Defines shared service processors that are instantiated once per flow blueprint. These processors handle requests from all flow instances of this class. ```json "class": { @@ -100,7 +100,7 @@ Maps flow-specific parameter names to centrally-stored parameter definitions: - Reduces duplication of parameter schemas ### 5. Metadata -Additional information about the flow class: +Additional information about the flow blueprint: ```json "description": "Human-readable description", @@ -117,7 +117,7 @@ Additional information about the flow class: - Example: `flow-123`, `customer-A-flow` #### {class} -- Replaced with the flow class name +- Replaced with the flow blueprint name - Creates shared resources across flows of the same class - Example: `standard-rag`, `enterprise-rag` @@ -203,14 +203,14 @@ Parameter names in settings correspond to keys in the flow's `parameters` sectio ## Queue Patterns (Pulsar) -Flow classes use Apache Pulsar for messaging. Queue names follow the Pulsar format: +Flow blueprintes use Apache Pulsar for messaging. Queue names follow the Pulsar format: ``` ://// ``` ### Components: - **persistence**: `persistent` or `non-persistent` (Pulsar persistence mode) -- **tenant**: `tg` for TrustGraph-supplied flow class definitions +- **tenant**: `tg` for TrustGraph-supplied flow blueprint definitions - **namespace**: Indicates the messaging pattern - `flow`: Fire-and-forget services - `request`: Request portion of request/response services @@ -232,7 +232,7 @@ Flow classes use Apache Pulsar for messaging. Queue names follow the Pulsar form ## Dataflow Architecture -The flow class creates a unified dataflow where: +The flow blueprint creates a unified dataflow where: 1. **Document Processing Pipeline**: Flows from ingestion through transformation to storage 2. **Query Services**: Integrated processors that query the same data stores and services @@ -245,7 +245,7 @@ All processors (both `{id}` and `{class}`) work together as a cohesive dataflow Given: - Flow Instance ID: `customer-A-flow` -- Flow Class: `standard-rag` +- Flow Blueprint: `standard-rag` - Flow parameter mappings: - `"model": "llm-model"` - `"temp": "temperature"` diff --git a/docs/tech-specs/flow-configurable-parameters.md b/docs/tech-specs/flow-configurable-parameters.md index b3b0ee5a..a605c114 100644 --- a/docs/tech-specs/flow-configurable-parameters.md +++ b/docs/tech-specs/flow-configurable-parameters.md @@ -1,8 +1,8 @@ -# Flow Class Configurable Parameters Technical Specification +# Flow Blueprint Configurable Parameters Technical Specification ## Overview -This specification describes the implementation of configurable parameters for flow classes in TrustGraph. Parameters enable users to customize processor parameters at flow launch time by providing values that replace parameter placeholders in the flow class definition. +This specification describes the implementation of configurable parameters for flow blueprintes in TrustGraph. Parameters enable users to customize processor parameters at flow launch time by providing values that replace parameter placeholders in the flow blueprint definition. Parameters work through template variable substitution in processor parameters, similar to how `{id}` and `{class}` variables work, but with user-provided values. @@ -21,25 +21,25 @@ The integration supports four primary use cases: - **Template Substitution**: Seamlessly replace parameter placeholders in processor parameters - **UI Integration**: Enable parameter input through both API and UI interfaces - **Type Safety**: Ensure parameter types match expected processor parameter types -- **Documentation**: Self-documenting parameter schemas within flow class definitions -- **Backward Compatibility**: Maintain compatibility with existing flow classes that don't use parameters +- **Documentation**: Self-documenting parameter schemas within flow blueprint definitions +- **Backward Compatibility**: Maintain compatibility with existing flow blueprintes that don't use parameters ## Background -Flow classes in TrustGraph now support processor parameters that can contain either fixed values or parameter placeholders. This creates an opportunity for runtime customization. +Flow blueprintes in TrustGraph now support processor parameters that can contain either fixed values or parameter placeholders. This creates an opportunity for runtime customization. Current processor parameters support: - Fixed values: `"model": "gemma3:12b"` - Parameter placeholders: `"model": "gemma3:{model-size}"` This specification defines how parameters are: -- Declared in flow class definitions +- Declared in flow blueprint definitions - Validated when flows are launched - Substituted in processor parameters - Exposed through APIs and UI By leveraging parameterized processor parameters, TrustGraph can: -- Reduce flow class duplication by using parameters for variations +- Reduce flow blueprint duplication by using parameters for variations - Enable users to tune processor behavior without modifying definitions - Support environment-specific configurations through parameter values - Maintain type safety through parameter schema validation @@ -51,7 +51,7 @@ By leveraging parameterized processor parameters, TrustGraph can: The configurable parameters system requires the following technical components: 1. **Parameter Schema Definition** - - JSON Schema-based parameter definitions within flow class metadata + - JSON Schema-based parameter definitions within flow blueprint metadata - Type definitions including string, number, boolean, enum, and object types - Validation rules including min/max values, patterns, and required fields @@ -92,7 +92,7 @@ The configurable parameters system requires the following technical components: #### Parameter Definitions (Stored in Schema/Config) -Parameter definitions are stored centrally in the schema and config system with type "parameter-types": +Parameter definitions are stored centrally in the schema and config system with type "parameter-type": ```json { @@ -146,9 +146,9 @@ Parameter definitions are stored centrally in the schema and config system with } ``` -#### Flow Class with Parameter References +#### Flow Blueprint with Parameter References -Flow classes define parameter metadata with type references, descriptions, and ordering: +Flow blueprintes define parameter metadata with type references, descriptions, and ordering: ```json { @@ -225,7 +225,7 @@ The `parameters` section maps flow-specific parameter names (keys) to parameter - `controlled-by` (optional): Name of another parameter that controls this parameter's value when in simple mode. When specified, this parameter inherits its value from the controlling parameter unless explicitly overridden This approach allows: -- Reusable parameter type definitions across multiple flow classes +- Reusable parameter type definitions across multiple flow blueprintes - Centralized parameter type management and validation - Flow-specific parameter descriptions and ordering - Enhanced UI experience with descriptive parameter forms @@ -253,7 +253,7 @@ The flow launch API accepts parameters using the flow's parameter names: Note: In this example, `llm-rag-model` is not explicitly provided but will inherit the value "claude-3" from `llm-model` due to its `controlled-by` relationship. Similarly, `chunk-overlap` could inherit a calculated value based on `chunk-size`. The system will: -1. Extract parameter metadata from flow class definition +1. Extract parameter metadata from flow blueprint definition 2. Map flow parameter names to their type definitions (e.g., `llm-model` → `llm-model` type) 3. Resolve controlled-by relationships (e.g., `llm-rag-model` inherits from `llm-model`) 4. Validate user-provided and inherited values against the parameter type definitions @@ -265,14 +265,14 @@ The system will: When a flow is started, the system performs the following parameter resolution steps: -1. **Flow Class Loading**: Load flow class definition and extract parameter metadata -2. **Metadata Extraction**: Extract `type`, `description`, `order`, `advanced`, and `controlled-by` for each parameter defined in the flow class's `parameters` section -3. **Type Definition Lookup**: For each parameter in the flow class: +1. **Flow Blueprint Loading**: Load flow blueprint definition and extract parameter metadata +2. **Metadata Extraction**: Extract `type`, `description`, `order`, `advanced`, and `controlled-by` for each parameter defined in the flow blueprint's `parameters` section +3. **Type Definition Lookup**: For each parameter in the flow blueprint: - Retrieve the parameter type definition from schema/config store using the `type` field - - The type definitions are stored with type "parameter-types" in the config system + - The type definitions are stored with type "parameter-type" in the config system - Each type definition contains the parameter's schema, default value, and validation rules 4. **Default Value Resolution**: - - For each parameter defined in the flow class: + - For each parameter defined in the flow blueprint: - Check if the user provided a value for this parameter - If no user value provided, use the `default` value from the parameter type definition - Build a complete parameter map containing both user-provided and default values @@ -361,7 +361,7 @@ The flow configuration service (`trustgraph-flow/trustgraph/config/service/flow. Resolve parameters by merging user-provided values with defaults. Args: - flow_class: The flow class definition dict + flow_class: The flow blueprint definition dict user_params: User-provided parameters dict Returns: @@ -370,20 +370,20 @@ The flow configuration service (`trustgraph-flow/trustgraph/config/service/flow. ``` This function should: - - Extract parameter metadata from the flow class's `parameters` section + - Extract parameter metadata from the flow blueprint's `parameters` section - For each parameter, fetch its type definition from config store - Apply defaults for any parameters not provided by the user - Handle `controlled-by` inheritance relationships - Return the complete parameter set 2. **Modified `handle_start_flow` Method** - - Call `resolve_parameters` after loading the flow class + - Call `resolve_parameters` after loading the flow blueprint - Use the complete resolved parameter set for template substitution - Store the complete parameter set (not just user-provided) with the flow - Validate that all required parameters have values 3. **Parameter Type Fetching** - - Parameter type definitions are stored in config with type "parameter-types" + - Parameter type definitions are stored in config with type "parameter-type" - Each type definition contains schema, default value, and validation rules - Cache frequently-used parameter types to reduce config lookups @@ -400,12 +400,12 @@ The flow configuration service (`trustgraph-flow/trustgraph/config/service/flow. 4. **Library CLI Commands** - CLI commands that start flows need parameter support: - Accept parameter values via command-line flags or configuration files - - Validate parameters against flow class definitions before submission + - Validate parameters against flow blueprint definitions before submission - Support parameter file input (JSON/YAML) for complex parameter sets - CLI commands that show flows need to display parameter information: - Show parameter values used when the flow was started - - Display available parameters for a flow class + - Display available parameters for a flow blueprint - Show parameter validation schemas and defaults #### Processor Base Class Integration @@ -456,7 +456,7 @@ Substituted in processor: "0.7" (string) ## Migration Plan -1. The system should continue to support flow classes with no parameters +1. The system should continue to support flow blueprintes with no parameters declared. 2. The system should continue to support flows no parameters specified: This works for flows with no parameters, and flows with parameters @@ -482,4 +482,4 @@ A: Just string substitution to remove strange injections and edge-cases. ## References - JSON Schema Specification: https://json-schema.org/ -- Flow Class Definition Spec: docs/tech-specs/flow-class-definition.md +- Flow Blueprint Definition Spec: docs/tech-specs/flow-class-definition.md diff --git a/tests/unit/test_base/test_flow_processor.py b/tests/unit/test_base/test_flow_processor.py index bcda2f84..70835e00 100644 --- a/tests/unit/test_base/test_flow_processor.py +++ b/tests/unit/test_base/test_flow_processor.py @@ -180,7 +180,7 @@ class TestFlowProcessorSimple(IsolatedAsyncioTestCase): 'test-flow': {'config': 'test-config'} } config_data = { - 'flows-active': { + 'active-flow': { 'test-processor': '{"test-flow": {"config": "test-config"}}' } } @@ -212,7 +212,7 @@ class TestFlowProcessorSimple(IsolatedAsyncioTestCase): # Configuration without flows for this processor config_data = { - 'flows-active': { + 'active-flow': { 'other-processor': '{"other-flow": {"config": "other-config"}}' } } @@ -241,7 +241,7 @@ class TestFlowProcessorSimple(IsolatedAsyncioTestCase): processor = FlowProcessor(**config) processor.id = 'test-processor' - # Configuration without flows-active key + # Configuration without active-flow key config_data = { 'other-data': 'some-value' } @@ -276,16 +276,16 @@ class TestFlowProcessorSimple(IsolatedAsyncioTestCase): # First configuration - start flow1 config_data1 = { - 'flows-active': { + 'active-flow': { 'test-processor': '{"flow1": {"config": "config1"}}' } } - + await processor.on_configure_flows(config_data1, version=1) - + # Second configuration - stop flow1, start flow2 config_data2 = { - 'flows-active': { + 'active-flow': { 'test-processor': '{"flow2": {"config": "config2"}}' } } diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index 744ad2e7..142a699b 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -28,42 +28,42 @@ class Flow: def id(self, id="default"): return FlowInstance(api=self, id=id) - def list_classes(self): + def list_blueprints(self): # The input consists of system and prompt strings input = { - "operation": "list-classes", + "operation": "list-blueprints", } - return self.request(request = input)["class-names"] + return self.request(request = input)["blueprint-names"] - def get_class(self, class_name): + def get_blueprint(self, blueprint_name): # The input consists of system and prompt strings input = { - "operation": "get-class", - "class-name": class_name, + "operation": "get-blueprint", + "blueprint-name": blueprint_name, } - return json.loads(self.request(request = input)["class-definition"]) + return json.loads(self.request(request = input)["blueprint-definition"]) - def put_class(self, class_name, definition): + def put_blueprint(self, blueprint_name, definition): # The input consists of system and prompt strings input = { - "operation": "put-class", - "class-name": class_name, - "class-definition": json.dumps(definition), + "operation": "put-blueprint", + "blueprint-name": blueprint_name, + "blueprint-definition": json.dumps(definition), } self.request(request = input) - def delete_class(self, class_name): + def delete_blueprint(self, blueprint_name): # The input consists of system and prompt strings input = { - "operation": "delete-class", - "class-name": class_name, + "operation": "delete-blueprint", + "blueprint-name": blueprint_name, } self.request(request = input) @@ -87,13 +87,13 @@ class Flow: return json.loads(self.request(request = input)["flow"]) - def start(self, class_name, id, description, parameters=None): + def start(self, blueprint_name, id, description, parameters=None): # The input consists of system and prompt strings input = { "operation": "start-flow", "flow-id": id, - "class-name": class_name, + "blueprint-name": blueprint_name, "description": description, } diff --git a/trustgraph-base/trustgraph/base/flow_processor.py b/trustgraph-base/trustgraph/base/flow_processor.py index 385e1346..0f170030 100644 --- a/trustgraph-base/trustgraph/base/flow_processor.py +++ b/trustgraph-base/trustgraph/base/flow_processor.py @@ -63,13 +63,13 @@ class FlowProcessor(AsyncProcessor): logger.info(f"Got config version {version}") # Skip over invalid data - if "flows-active" not in config: return + if "active-flow" not in config: return # Check there's configuration information for me - if self.id in config["flows-active"]: + if self.id in config["active-flow"]: # Get my flow config - flow_config = json.loads(config["flows-active"][self.id]) + flow_config = json.loads(config["active-flow"][self.id]) else: diff --git a/trustgraph-base/trustgraph/messaging/translators/flow.py b/trustgraph-base/trustgraph/messaging/translators/flow.py index 8c1a019a..542b65ec 100644 --- a/trustgraph-base/trustgraph/messaging/translators/flow.py +++ b/trustgraph-base/trustgraph/messaging/translators/flow.py @@ -9,8 +9,8 @@ class FlowRequestTranslator(MessageTranslator): def to_pulsar(self, data: Dict[str, Any]) -> FlowRequest: return FlowRequest( operation=data.get("operation"), - class_name=data.get("class-name"), - class_definition=data.get("class-definition"), + blueprint_name=data.get("blueprint-name"), + blueprint_definition=data.get("blueprint-definition"), description=data.get("description"), flow_id=data.get("flow-id"), parameters=data.get("parameters") @@ -21,10 +21,10 @@ class FlowRequestTranslator(MessageTranslator): if obj.operation is not None: result["operation"] = obj.operation - if obj.class_name is not None: - result["class-name"] = obj.class_name - if obj.class_definition is not None: - result["class-definition"] = obj.class_definition + if obj.blueprint_name is not None: + result["blueprint-name"] = obj.blueprint_name + if obj.blueprint_definition is not None: + result["blueprint-definition"] = obj.blueprint_definition if obj.description is not None: result["description"] = obj.description if obj.flow_id is not None: @@ -44,12 +44,12 @@ class FlowResponseTranslator(MessageTranslator): def from_pulsar(self, obj: FlowResponse) -> Dict[str, Any]: result = {} - if obj.class_names is not None: - result["class-names"] = obj.class_names + if obj.blueprint_names is not None: + result["blueprint-names"] = obj.blueprint_names if obj.flow_ids is not None: result["flow-ids"] = obj.flow_ids - if obj.class_definition is not None: - result["class-definition"] = obj.class_definition + if obj.blueprint_definition is not None: + result["blueprint-definition"] = obj.blueprint_definition if obj.flow is not None: result["flow"] = obj.flow if obj.description is not None: diff --git a/trustgraph-base/trustgraph/schema/services/flow.py b/trustgraph-base/trustgraph/schema/services/flow.py index b993b1b3..cf62c84d 100644 --- a/trustgraph-base/trustgraph/schema/services/flow.py +++ b/trustgraph-base/trustgraph/schema/services/flow.py @@ -7,27 +7,27 @@ from ..core.primitives import Error ############################################################################ # Flow service: -# list_classes() -> (classname[]) -# get_class(classname) -> (class) -# put_class(class) -> (class) -# delete_class(classname) -> () +# list_blueprints() -> (blueprintname[]) +# get_blueprint(blueprintname) -> (blueprint) +# put_blueprint(blueprint) -> (blueprint) +# delete_blueprint(blueprintname) -> () # # list_flows() -> (flowid[]) # get_flow(flowid) -> (flow) -# start_flow(flowid, classname) -> () +# start_flow(flowid, blueprintname) -> () # stop_flow(flowid) -> () # Prompt services, abstract the prompt generation @dataclass class FlowRequest: - operation: str = "" # list-classes, get-class, put-class, delete-class + operation: str = "" # list-blueprints, get-blueprint, put-blueprint, delete-blueprint # list-flows, get-flow, start-flow, stop-flow - # get_class, put_class, delete_class, start_flow - class_name: str = "" + # get_blueprint, put_blueprint, delete_blueprint, start_flow + blueprint_name: str = "" - # put_class - class_definition: str = "" + # put_blueprint + blueprint_definition: str = "" # start_flow description: str = "" @@ -40,14 +40,14 @@ class FlowRequest: @dataclass class FlowResponse: - # list_classes - class_names: list[str] = field(default_factory=list) + # list_blueprints + blueprint_names: list[str] = field(default_factory=list) # list_flows flow_ids: list[str] = field(default_factory=list) - # get_class - class_definition: str = "" + # get_blueprint + blueprint_definition: str = "" # get_flow flow: str = "" diff --git a/trustgraph-cli/pyproject.toml b/trustgraph-cli/pyproject.toml index 65921d92..5568bf91 100644 --- a/trustgraph-cli/pyproject.toml +++ b/trustgraph-cli/pyproject.toml @@ -29,13 +29,13 @@ Homepage = "https://github.com/trustgraph-ai/trustgraph" [project.scripts] tg-add-library-document = "trustgraph.cli.add_library_document:main" -tg-delete-flow-class = "trustgraph.cli.delete_flow_class:main" +tg-delete-flow-blueprint = "trustgraph.cli.delete_flow_blueprint:main" tg-delete-mcp-tool = "trustgraph.cli.delete_mcp_tool:main" tg-delete-kg-core = "trustgraph.cli.delete_kg_core:main" tg-delete-tool = "trustgraph.cli.delete_tool:main" tg-dump-msgpack = "trustgraph.cli.dump_msgpack:main" tg-dump-queues = "trustgraph.cli.dump_queues:main" -tg-get-flow-class = "trustgraph.cli.get_flow_class:main" +tg-get-flow-blueprint = "trustgraph.cli.get_flow_blueprint:main" tg-get-kg-core = "trustgraph.cli.get_kg_core:main" tg-graph-to-turtle = "trustgraph.cli.graph_to_turtle:main" tg-init-trustgraph = "trustgraph.cli.init_trustgraph:main" @@ -56,7 +56,7 @@ tg-load-text = "trustgraph.cli.load_text:main" tg-load-turtle = "trustgraph.cli.load_turtle:main" tg-load-knowledge = "trustgraph.cli.load_knowledge:main" tg-load-structured-data = "trustgraph.cli.load_structured_data:main" -tg-put-flow-class = "trustgraph.cli.put_flow_class:main" +tg-put-flow-blueprint = "trustgraph.cli.put_flow_blueprint:main" tg-put-kg-core = "trustgraph.cli.put_kg_core:main" tg-remove-library-document = "trustgraph.cli.remove_library_document:main" tg-save-doc-embeds = "trustgraph.cli.save_doc_embeds:main" @@ -65,7 +65,7 @@ tg-set-prompt = "trustgraph.cli.set_prompt:main" tg-set-token-costs = "trustgraph.cli.set_token_costs:main" tg-set-tool = "trustgraph.cli.set_tool:main" tg-show-config = "trustgraph.cli.show_config:main" -tg-show-flow-classes = "trustgraph.cli.show_flow_classes:main" +tg-show-flow-blueprints = "trustgraph.cli.show_flow_blueprints:main" tg-show-flow-state = "trustgraph.cli.show_flow_state:main" tg-show-flows = "trustgraph.cli.show_flows:main" tg-show-graph = "trustgraph.cli.show_graph:main" diff --git a/trustgraph-cli/trustgraph/cli/delete_flow_class.py b/trustgraph-cli/trustgraph/cli/delete_flow_blueprint.py similarity index 65% rename from trustgraph-cli/trustgraph/cli/delete_flow_class.py rename to trustgraph-cli/trustgraph/cli/delete_flow_blueprint.py index ba0a5a9c..9ff8aeba 100644 --- a/trustgraph-cli/trustgraph/cli/delete_flow_class.py +++ b/trustgraph-cli/trustgraph/cli/delete_flow_blueprint.py @@ -1,5 +1,5 @@ """ -Deletes a flow class +Deletes a flow blueprint """ import argparse @@ -10,16 +10,16 @@ import json default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/') -def delete_flow_class(url, class_name): +def delete_flow_blueprint(url, blueprint_name): api = Api(url).flow() - class_names = api.delete_class(class_name) + blueprint_names = api.delete_blueprint(blueprint_name) def main(): parser = argparse.ArgumentParser( - prog='tg-delete-flow-class', + prog='tg-delete-flow-blueprint', description=__doc__, ) @@ -30,17 +30,17 @@ def main(): ) parser.add_argument( - '-n', '--class-name', - help=f'Flow class name', + '-n', '--blueprint-name', + help=f'Flow blueprint name', ) args = parser.parse_args() try: - delete_flow_class( + delete_flow_blueprint( url=args.api_url, - class_name=args.class_name, + blueprint_name=args.blueprint_name, ) except Exception as e: @@ -48,4 +48,4 @@ def main(): print("Exception:", e, flush=True) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/trustgraph-cli/trustgraph/cli/get_flow_class.py b/trustgraph-cli/trustgraph/cli/get_flow_blueprint.py similarity index 67% rename from trustgraph-cli/trustgraph/cli/get_flow_class.py rename to trustgraph-cli/trustgraph/cli/get_flow_blueprint.py index 5479e507..817b8f47 100644 --- a/trustgraph-cli/trustgraph/cli/get_flow_class.py +++ b/trustgraph-cli/trustgraph/cli/get_flow_blueprint.py @@ -1,5 +1,5 @@ """ -Outputs a flow class definition in JSON format. +Outputs a flow blueprint definition in JSON format. """ import argparse @@ -10,18 +10,18 @@ import json default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/') -def get_flow_class(url, class_name): +def get_flow_blueprint(url, blueprint_name): api = Api(url).flow() - cls = api.get_class(class_name) + cls = api.get_blueprint(blueprint_name) print(json.dumps(cls, indent=4)) def main(): parser = argparse.ArgumentParser( - prog='tg-get-flow-class', + prog='tg-get-flow-blueprint', description=__doc__, ) @@ -32,18 +32,18 @@ def main(): ) parser.add_argument( - '-n', '--class-name', + '-n', '--blueprint-name', required=True, - help=f'Flow class name', + help=f'Flow blueprint name', ) args = parser.parse_args() try: - get_flow_class( + get_flow_blueprint( url=args.api_url, - class_name=args.class_name, + blueprint_name=args.blueprint_name, ) except Exception as e: @@ -51,4 +51,4 @@ def main(): print("Exception:", e, flush=True) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/trustgraph-cli/trustgraph/cli/put_flow_class.py b/trustgraph-cli/trustgraph/cli/put_flow_blueprint.py similarity index 69% rename from trustgraph-cli/trustgraph/cli/put_flow_class.py rename to trustgraph-cli/trustgraph/cli/put_flow_blueprint.py index 6a88421d..740a224a 100644 --- a/trustgraph-cli/trustgraph/cli/put_flow_class.py +++ b/trustgraph-cli/trustgraph/cli/put_flow_blueprint.py @@ -1,6 +1,6 @@ """ -Uploads a flow class definition. You can take the output of -tg-get-flow-class and load it back in using this utility. +Uploads a flow blueprint definition. You can take the output of +tg-get-flow-blueprint and load it back in using this utility. """ import argparse @@ -11,16 +11,16 @@ import json default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/') default_token = os.getenv("TRUSTGRAPH_TOKEN", None) -def put_flow_class(url, class_name, config, token=None): +def put_flow_blueprint(url, blueprint_name, config, token=None): api = Api(url, token=token) - class_names = api.flow().put_class(class_name, config) + blueprint_names = api.flow().put_blueprint(blueprint_name, config) def main(): parser = argparse.ArgumentParser( - prog='tg-put-flow-class', + prog='tg-put-flow-blueprint', description=__doc__, ) @@ -37,8 +37,8 @@ def main(): ) parser.add_argument( - '-n', '--class-name', - help=f'Flow class name', + '-n', '--blueprint-name', + help=f'Flow blueprint name', ) parser.add_argument( @@ -50,9 +50,9 @@ def main(): try: - put_flow_class( + put_flow_blueprint( url=args.api_url, - class_name=args.class_name, + blueprint_name=args.blueprint_name, config=json.loads(args.config), token=args.token, ) @@ -62,4 +62,4 @@ def main(): print("Exception:", e, flush=True) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/trustgraph-cli/trustgraph/cli/show_flow_classes.py b/trustgraph-cli/trustgraph/cli/show_flow_blueprints.py similarity index 83% rename from trustgraph-cli/trustgraph/cli/show_flow_classes.py rename to trustgraph-cli/trustgraph/cli/show_flow_blueprints.py index 123f5380..ca1f5c83 100644 --- a/trustgraph-cli/trustgraph/cli/show_flow_classes.py +++ b/trustgraph-cli/trustgraph/cli/show_flow_blueprints.py @@ -1,5 +1,5 @@ """ -Shows all defined flow classes. +Shows all defined flow blueprints. """ import argparse @@ -16,7 +16,7 @@ def format_parameters(params_metadata, config_api): Format parameter metadata for display Args: - params_metadata: Parameter definitions from flow class + params_metadata: Parameter definitions from flow blueprint config_api: API client to get parameter type information Returns: @@ -41,7 +41,7 @@ def format_parameters(params_metadata, config_api): type_info = param_type if config_api: try: - key = ConfigKey("parameter-types", param_type) + key = ConfigKey("parameter-type", param_type) type_def_value = config_api.get([key])[0].value param_type_def = json.loads(type_def_value) @@ -58,23 +58,23 @@ def format_parameters(params_metadata, config_api): return "\n".join(param_list) -def show_flow_classes(url, token=None): +def show_flow_blueprints(url, token=None): api = Api(url, token=token) flow_api = api.flow() config_api = api.config() - class_names = flow_api.list_classes() + blueprint_names = flow_api.list_blueprints() - if len(class_names) == 0: - print("No flow classes.") + if len(blueprint_names) == 0: + print("No flow blueprints.") return - for class_name in class_names: - cls = flow_api.get_class(class_name) + for blueprint_name in blueprint_names: + cls = flow_api.get_blueprint(blueprint_name) table = [] - table.append(("name", class_name)) + table.append(("name", blueprint_name)) table.append(("description", cls.get("description", ""))) tags = cls.get("tags", []) @@ -97,7 +97,7 @@ def show_flow_classes(url, token=None): def main(): parser = argparse.ArgumentParser( - prog='tg-show-flow-classes', + prog='tg-show-flow-blueprints', description=__doc__, ) @@ -117,7 +117,7 @@ def main(): try: - show_flow_classes( + show_flow_blueprints( url=args.api_url, token=args.token, ) @@ -127,4 +127,4 @@ def main(): print("Exception:", e, flush=True) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/trustgraph-cli/trustgraph/cli/show_flow_state.py b/trustgraph-cli/trustgraph/cli/show_flow_state.py index 6ca4df8f..d5d87f2c 100644 --- a/trustgraph-cli/trustgraph/cli/show_flow_state.py +++ b/trustgraph-cli/trustgraph/cli/show_flow_state.py @@ -16,15 +16,15 @@ def dump_status(metrics_url, api_url, flow_id, token=None): api = Api(api_url, token=token).flow() flow = api.get(flow_id) - class_name = flow["class-name"] + blueprint_name = flow["blueprint-name"] print() print(f"Flow {flow_id}") show_processors(metrics_url, flow_id) print() - print(f"Class {class_name}") - show_processors(metrics_url, class_name) + print(f"Blueprint {blueprint_name}") + show_processors(metrics_url, blueprint_name) print() diff --git a/trustgraph-cli/trustgraph/cli/show_flows.py b/trustgraph-cli/trustgraph/cli/show_flows.py index b383ff56..828c18f1 100644 --- a/trustgraph-cli/trustgraph/cli/show_flows.py +++ b/trustgraph-cli/trustgraph/cli/show_flows.py @@ -13,7 +13,7 @@ default_token = os.getenv("TRUSTGRAPH_TOKEN", None) def get_interface(config_api, i): - key = ConfigKey("interface-descriptions", i) + key = ConfigKey("interface-description", i) value = config_api.get([key])[0].value @@ -70,13 +70,13 @@ def get_enum_description(param_value, param_type_def): # If not found in enum, return original value return param_value -def format_parameters(flow_params, class_params_metadata, config_api): +def format_parameters(flow_params, blueprint_params_metadata, config_api): """ Format flow parameters with their human-readable descriptions Args: flow_params: The actual parameter values used in the flow - class_params_metadata: The parameter metadata from the flow class definition + blueprint_params_metadata: The parameter metadata from the flow blueprint definition config_api: API client to retrieve parameter type definitions Returns: @@ -89,7 +89,7 @@ def format_parameters(flow_params, class_params_metadata, config_api): # Sort parameters by order if available sorted_params = sorted( - class_params_metadata.items(), + blueprint_params_metadata.items(), key=lambda x: x[1].get("order", 999) ) @@ -105,7 +105,7 @@ def format_parameters(flow_params, class_params_metadata, config_api): if param_type and config_api: try: from trustgraph.api import ConfigKey - key = ConfigKey("parameter-types", param_type) + key = ConfigKey("parameter-type", param_type) type_def_value = config_api.get([key])[0].value param_type_def = json.loads(type_def_value) display_value = get_enum_description(value, param_type_def) @@ -122,9 +122,9 @@ def format_parameters(flow_params, class_params_metadata, config_api): param_list.append(line) - # Add any parameters that aren't in the class metadata (shouldn't happen normally) + # Add any parameters that aren't in the blueprint metadata (shouldn't happen normally) for param_name, value in flow_params.items(): - if param_name not in class_params_metadata: + if param_name not in blueprint_params_metadata: param_list.append(f"• {param_name}: {value} (undefined)") return "\n".join(param_list) if param_list else "None" @@ -135,7 +135,7 @@ def show_flows(url, token=None): config_api = api.config() flow_api = api.flow() - interface_names = config_api.list("interface-descriptions") + interface_names = config_api.list("interface-description") interface_defs = { i: get_interface(config_api, i) @@ -156,24 +156,24 @@ def show_flows(url, token=None): table = [] table.append(("id", id)) - table.append(("class", flow.get("class-name", ""))) + table.append(("blueprint", flow.get("blueprint-name", ""))) table.append(("desc", flow.get("description", ""))) # Display parameters with human-readable descriptions parameters = flow.get("parameters", {}) if parameters: - # Try to get the flow class definition for parameter metadata - class_name = flow.get("class-name", "") - if class_name: + # Try to get the flow blueprint definition for parameter metadata + blueprint_name = flow.get("blueprint-name", "") + if blueprint_name: try: - flow_class = flow_api.get_class(class_name) - class_params_metadata = flow_class.get("parameters", {}) - param_str = format_parameters(parameters, class_params_metadata, config_api) + flow_blueprint = flow_api.get_blueprint(blueprint_name) + blueprint_params_metadata = flow_blueprint.get("parameters", {}) + param_str = format_parameters(parameters, blueprint_params_metadata, config_api) except Exception as e: - # Fallback to JSON if we can't get the class definition + # Fallback to JSON if we can't get the blueprint definition param_str = json.dumps(parameters, indent=2) else: - # No class name, fallback to JSON + # No blueprint name, fallback to JSON param_str = json.dumps(parameters, indent=2) table.append(("parameters", param_str)) diff --git a/trustgraph-cli/trustgraph/cli/show_parameter_types.py b/trustgraph-cli/trustgraph/cli/show_parameter_types.py index e5b842b5..2e0f1be3 100644 --- a/trustgraph-cli/trustgraph/cli/show_parameter_types.py +++ b/trustgraph-cli/trustgraph/cli/show_parameter_types.py @@ -85,7 +85,7 @@ def show_parameter_types(url, token=None): # Get list of all parameter types try: - param_type_names = config_api.list("parameter-types") + param_type_names = config_api.list("parameter-type") except Exception as e: print(f"Error retrieving parameter types: {e}") return @@ -97,7 +97,7 @@ def show_parameter_types(url, token=None): for param_type_name in param_type_names: try: # Get the parameter type definition - key = ConfigKey("parameter-types", param_type_name) + key = ConfigKey("parameter-type", param_type_name) type_def_value = config_api.get([key])[0].value param_type_def = json.loads(type_def_value) @@ -179,7 +179,7 @@ def show_specific_parameter_type(url, param_type_name, token=None): try: # Get the parameter type definition - key = ConfigKey("parameter-types", param_type_name) + key = ConfigKey("parameter-type", param_type_name) type_def_value = config_api.get([key])[0].value param_type_def = json.loads(type_def_value) diff --git a/trustgraph-cli/trustgraph/cli/show_token_costs.py b/trustgraph-cli/trustgraph/cli/show_token_costs.py index 9e7c352a..adc13ad7 100644 --- a/trustgraph-cli/trustgraph/cli/show_token_costs.py +++ b/trustgraph-cli/trustgraph/cli/show_token_costs.py @@ -18,7 +18,7 @@ def show_config(url, token=None): api = Api(url, token=token).config() - models = api.list("token-costs") + models = api.list("token-cost") costs = [] @@ -29,7 +29,7 @@ def show_config(url, token=None): try: values = json.loads(api.get([ - ConfigKey(type="token-costs", key=model), + ConfigKey(type="token-cost", key=model), ])[0].value) costs.append(( model, diff --git a/trustgraph-cli/trustgraph/cli/start_flow.py b/trustgraph-cli/trustgraph/cli/start_flow.py index 4f9954b0..e04e241d 100644 --- a/trustgraph-cli/trustgraph/cli/start_flow.py +++ b/trustgraph-cli/trustgraph/cli/start_flow.py @@ -1,5 +1,5 @@ """ -Starts a processing flow using a defined flow class. +Starts a processing flow using a defined flow blueprint. Parameters can be provided in three ways: 1. As key=value pairs: --param model=gpt-4 --param temp=0.7 @@ -19,12 +19,12 @@ import json default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/') default_token = os.getenv("TRUSTGRAPH_TOKEN", None) -def start_flow(url, class_name, flow_id, description, parameters=None, token=None): +def start_flow(url, blueprint_name, flow_id, description, parameters=None, token=None): api = Api(url, token=token).flow() api.start( - class_name = class_name, + blueprint_name = blueprint_name, id = flow_id, description = description, parameters = parameters, @@ -50,9 +50,9 @@ def main(): ) parser.add_argument( - '-n', '--class-name', + '-n', '--blueprint-name', required=True, - help=f'Flow class name', + help=f'Flow blueprint name', ) parser.add_argument( @@ -115,7 +115,7 @@ def main(): start_flow( url = args.api_url, - class_name = args.class_name, + blueprint_name = args.blueprint_name, flow_id = args.flow_id, description = args.description, parameters = parameters, diff --git a/trustgraph-cli/trustgraph/cli/verify_system_status.py b/trustgraph-cli/trustgraph/cli/verify_system_status.py index 294a3738..8cebc83f 100644 --- a/trustgraph-cli/trustgraph/cli/verify_system_status.py +++ b/trustgraph-cli/trustgraph/cli/verify_system_status.py @@ -194,21 +194,21 @@ def check_processors(url: str, min_processors: int, timeout: int, token: Optiona return False, f"Processor check error: {e}" -def check_flow_classes(url: str, timeout: int, token: Optional[str] = None) -> Tuple[bool, str]: - """Check if flow classes are loaded.""" +def check_flow_blueprints(url: str, timeout: int, token: Optional[str] = None) -> Tuple[bool, str]: + """Check if flow blueprints are loaded.""" try: api = Api(url, token=token, timeout=timeout) flow_api = api.flow() - classes = flow_api.list_classes() + blueprints = flow_api.list_blueprints() - if classes and len(classes) > 0: - return True, f"Found {len(classes)} flow class(es)" + if blueprints and len(blueprints) > 0: + return True, f"Found {len(blueprints)} flow blueprint(s)" else: - return False, "No flow classes found" + return False, "No flow blueprints found" except Exception as e: - return False, f"Flow classes check error: {e}" + return False, f"Flow blueprints check error: {e}" def check_flows(url: str, timeout: int, token: Optional[str] = None) -> Tuple[bool, str]: @@ -416,8 +416,8 @@ def main(): ) checker.run_check( - "Flow Classes", - check_flow_classes, + "Flow Blueprints", + check_flow_blueprints, args.api_url, args.check_timeout, args.token diff --git a/trustgraph-flow/trustgraph/agent/mcp_tool/service.py b/trustgraph-flow/trustgraph/agent/mcp_tool/service.py index 3858d06b..23789b96 100755 --- a/trustgraph-flow/trustgraph/agent/mcp_tool/service.py +++ b/trustgraph-flow/trustgraph/agent/mcp_tool/service.py @@ -32,7 +32,9 @@ class Service(ToolService): logger.info(f"Got config version {version}") - if "mcp" not in config: return + if "mcp" not in config: + self.mcp_services = {} + return self.mcp_services = { k: json.loads(v) diff --git a/trustgraph-flow/trustgraph/config/service/flow.py b/trustgraph-flow/trustgraph/config/service/flow.py index 42696c31..ab02fa30 100644 --- a/trustgraph-flow/trustgraph/config/service/flow.py +++ b/trustgraph-flow/trustgraph/config/service/flow.py @@ -13,26 +13,26 @@ class FlowConfig: # Cache for parameter type definitions to avoid repeated lookups self.param_type_cache = {} - async def resolve_parameters(self, flow_class, user_params): + async def resolve_parameters(self, flow_blueprint, user_params): """ Resolve parameters by merging user-provided values with defaults. Args: - flow_class: The flow class definition dict + flow_blueprint: The flow blueprint definition dict user_params: User-provided parameters dict (may be None or empty) Returns: Complete parameter dict with user values and defaults merged (all values as strings) """ - # If the flow class has no parameters section, return user params as-is (stringified) - if "parameters" not in flow_class: + # If the flow blueprint has no parameters section, return user params as-is (stringified) + if "parameters" not in flow_blueprint: if not user_params: return {} # Ensure all values are strings return {k: str(v) for k, v in user_params.items()} resolved = {} - flow_params = flow_class["parameters"] + flow_params = flow_blueprint["parameters"] user_params = user_params if user_params else {} # First pass: resolve parameters with explicit values or defaults @@ -49,7 +49,7 @@ class FlowConfig: if param_type not in self.param_type_cache: try: # Fetch parameter type definition from config store - type_def = await self.config.get("parameter-types").get(param_type) + type_def = await self.config.get("parameter-type").get(param_type) if type_def: self.param_type_cache[param_type] = json.loads(type_def) else: @@ -92,7 +92,7 @@ class FlowConfig: else: resolved[param_name] = str(default_value) - # Include any extra parameters from user that weren't in flow class definition + # Include any extra parameters from user that weren't in flow blueprint definition # This allows for forward compatibility (ensure they're strings) for key, value in user_params.items(): if key not in resolved: @@ -100,28 +100,28 @@ class FlowConfig: return resolved - async def handle_list_classes(self, msg): + async def handle_list_blueprints(self, msg): - names = list(await self.config.get("flow-classes").keys()) + names = list(await self.config.get("flow-blueprint").keys()) return FlowResponse( error = None, - class_names = names, + blueprint_names = names, ) - async def handle_get_class(self, msg): + async def handle_get_blueprint(self, msg): return FlowResponse( error = None, - class_definition = await self.config.get( - "flow-classes" - ).get(msg.class_name), + blueprint_definition = await self.config.get( + "flow-blueprint" + ).get(msg.blueprint_name), ) - async def handle_put_class(self, msg): + async def handle_put_blueprint(self, msg): - await self.config.get("flow-classes").put( - msg.class_name, msg.class_definition + await self.config.get("flow-blueprint").put( + msg.blueprint_name, msg.blueprint_definition ) await self.config.inc_version() @@ -132,11 +132,11 @@ class FlowConfig: error = None, ) - async def handle_delete_class(self, msg): + async def handle_delete_blueprint(self, msg): logger.debug(f"Flow config message: {msg}") - await self.config.get("flow-classes").delete(msg.class_name) + await self.config.get("flow-blueprint").delete(msg.blueprint_name) await self.config.inc_version() @@ -148,7 +148,7 @@ class FlowConfig: async def handle_list_flows(self, msg): - names = list(await self.config.get("flows").keys()) + names = list(await self.config.get("flow").keys()) return FlowResponse( error = None, @@ -157,7 +157,7 @@ class FlowConfig: async def handle_get_flow(self, msg): - flow_data = await self.config.get("flows").get(msg.flow_id) + flow_data = await self.config.get("flow").get(msg.flow_id) flow = json.loads(flow_data) return FlowResponse( @@ -169,23 +169,23 @@ class FlowConfig: async def handle_start_flow(self, msg): - if msg.class_name is None: - raise RuntimeError("No class name") + if msg.blueprint_name is None: + raise RuntimeError("No blueprint name") if msg.flow_id is None: raise RuntimeError("No flow ID") - if msg.flow_id in await self.config.get("flows").keys(): + if msg.flow_id in await self.config.get("flow").keys(): raise RuntimeError("Flow already exists") if msg.description is None: raise RuntimeError("No description") - if msg.class_name not in await self.config.get("flow-classes").keys(): - raise RuntimeError("Class does not exist") + if msg.blueprint_name not in await self.config.get("flow-blueprint").keys(): + raise RuntimeError("Blueprint does not exist") cls = json.loads( - await self.config.get("flow-classes").get(msg.class_name) + await self.config.get("flow-blueprint").get(msg.blueprint_name) ) # Resolve parameters by merging user-provided values with defaults @@ -200,7 +200,7 @@ class FlowConfig: def repl_template_with_params(tmp): result = tmp.replace( - "{class}", msg.class_name + "{blueprint}", msg.blueprint_name ).replace( "{id}", msg.flow_id ) @@ -210,7 +210,7 @@ class FlowConfig: return result - for kind in ("class", "flow"): + for kind in ("blueprint", "flow"): for k, v in cls[kind].items(): @@ -223,7 +223,7 @@ class FlowConfig: for k2, v2 in v.items() } - flac = await self.config.get("flows-active").get(processor) + flac = await self.config.get("active-flow").get(processor) if flac is not None: target = json.loads(flac) else: @@ -237,7 +237,7 @@ class FlowConfig: if variant not in target: target[variant] = v - await self.config.get("flows-active").put( + await self.config.get("active-flow").put( processor, json.dumps(target) ) @@ -258,11 +258,11 @@ class FlowConfig: else: interfaces = {} - await self.config.get("flows").put( + await self.config.get("flow").put( msg.flow_id, json.dumps({ "description": msg.description, - "class-name": msg.class_name, + "blueprint-name": msg.blueprint_name, "interfaces": interfaces, "parameters": parameters, }) @@ -281,22 +281,22 @@ class FlowConfig: if msg.flow_id is None: raise RuntimeError("No flow ID") - if msg.flow_id not in await self.config.get("flows").keys(): + if msg.flow_id not in await self.config.get("flow").keys(): raise RuntimeError("Flow ID invalid") - flow = json.loads(await self.config.get("flows").get(msg.flow_id)) + flow = json.loads(await self.config.get("flow").get(msg.flow_id)) - if "class-name" not in flow: - raise RuntimeError("Internal error: flow has no flow class") + if "blueprint-name" not in flow: + raise RuntimeError("Internal error: flow has no flow blueprint") - class_name = flow["class-name"] + blueprint_name = flow["blueprint-name"] parameters = flow.get("parameters", {}) - cls = json.loads(await self.config.get("flow-classes").get(class_name)) + cls = json.loads(await self.config.get("flow-blueprint").get(blueprint_name)) def repl_template(tmp): result = tmp.replace( - "{class}", class_name + "{blueprint}", blueprint_name ).replace( "{id}", msg.flow_id ) @@ -313,7 +313,7 @@ class FlowConfig: variant = repl_template(variant) - flac = await self.config.get("flows-active").get(processor) + flac = await self.config.get("active-flow").get(processor) if flac is not None: target = json.loads(flac) @@ -323,12 +323,12 @@ class FlowConfig: if variant in target: del target[variant] - await self.config.get("flows-active").put( + await self.config.get("active-flow").put( processor, json.dumps(target) ) - if msg.flow_id in await self.config.get("flows").keys(): - await self.config.get("flows").delete(msg.flow_id) + if msg.flow_id in await self.config.get("flow").keys(): + await self.config.get("flow").delete(msg.flow_id) await self.config.inc_version() @@ -342,14 +342,14 @@ class FlowConfig: logger.debug(f"Handling flow message: {msg.operation}") - if msg.operation == "list-classes": - resp = await self.handle_list_classes(msg) - elif msg.operation == "get-class": - resp = await self.handle_get_class(msg) - elif msg.operation == "put-class": - resp = await self.handle_put_class(msg) - elif msg.operation == "delete-class": - resp = await self.handle_delete_class(msg) + if msg.operation == "list-blueprints": + resp = await self.handle_list_blueprints(msg) + elif msg.operation == "get-blueprint": + resp = await self.handle_get_blueprint(msg) + elif msg.operation == "put-blueprint": + resp = await self.handle_put_blueprint(msg) + elif msg.operation == "delete-blueprint": + resp = await self.handle_delete_blueprint(msg) elif msg.operation == "list-flows": resp = await self.handle_list_flows(msg) elif msg.operation == "get-flow": diff --git a/trustgraph-flow/trustgraph/metering/counter.py b/trustgraph-flow/trustgraph/metering/counter.py index 07dea8ba..7851232a 100644 --- a/trustgraph-flow/trustgraph/metering/counter.py +++ b/trustgraph-flow/trustgraph/metering/counter.py @@ -52,7 +52,7 @@ class Processor(FlowProcessor): self.prices = {} - self.config_key = "token-costs" + self.config_key = "token-cost" # Load token costs from the config service async def on_cost_config(self, config, version): From 387afee7b7251e9ea197fb8f22d31f59b3ba9688 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Wed, 14 Jan 2026 15:46:29 +0000 Subject: [PATCH 5/9] Fix load-doc (#610) --- trustgraph-flow/trustgraph/librarian/service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/trustgraph-flow/trustgraph/librarian/service.py b/trustgraph-flow/trustgraph/librarian/service.py index 1d04ee06..7c1e428c 100755 --- a/trustgraph-flow/trustgraph/librarian/service.py +++ b/trustgraph-flow/trustgraph/librarian/service.py @@ -258,11 +258,11 @@ class Processor(AsyncProcessor): logger.info(f"Configuration version: {version}") - if "flows" in config: + if "flow" in config: self.flows = { k: json.loads(v) - for k, v in config["flows"].items() + for k, v in config["flow"].items() } logger.debug(f"Flows: {self.flows}") From 62b754d788a9d55aa88fdfa2c7b336512f83130a Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Wed, 14 Jan 2026 16:23:15 +0000 Subject: [PATCH 6/9] Fix flow loading (#611) --- .../unit/test_gateway/test_config_receiver.py | 8 +++---- .../trustgraph/gateway/config/receiver.py | 24 +++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/tests/unit/test_gateway/test_config_receiver.py b/tests/unit/test_gateway/test_config_receiver.py index ee500766..803ff4c6 100644 --- a/tests/unit/test_gateway/test_config_receiver.py +++ b/tests/unit/test_gateway/test_config_receiver.py @@ -64,7 +64,7 @@ class TestConfigReceiver: mock_msg.value.return_value = Mock( version="1.0", config={ - "flows": { + "flow": { "flow1": '{"name": "test_flow_1", "steps": []}', "flow2": '{"name": "test_flow_2", "steps": []}' } @@ -109,7 +109,7 @@ class TestConfigReceiver: mock_msg.value.return_value = Mock( version="1.0", config={ - "flows": { + "flow": { "flow1": '{"name": "test_flow_1", "steps": []}' } } @@ -352,7 +352,7 @@ class TestConfigReceiver: mock_msg.value.return_value = Mock( version="1.0", config={ - "flows": { + "flow": { "flow2": '{"name": "test_flow_2", "steps": []}', "flow3": '{"name": "test_flow_3", "steps": []}' } @@ -393,7 +393,7 @@ class TestConfigReceiver: mock_msg.value.return_value = Mock( version="1.0", config={ - "flows": { + "flow": { "flow1": '{"invalid": json}', # Invalid JSON "flow2": '{"name": "valid_flow", "steps": []}' # Valid JSON } diff --git a/trustgraph-flow/trustgraph/gateway/config/receiver.py b/trustgraph-flow/trustgraph/gateway/config/receiver.py index bdd123a9..4bf39ccd 100755 --- a/trustgraph-flow/trustgraph/gateway/config/receiver.py +++ b/trustgraph-flow/trustgraph/gateway/config/receiver.py @@ -53,22 +53,20 @@ class ConfigReceiver: logger.info(f"Config version: {v.version}") - if "flows" in v.config: + flows = v.config.get("flow", {}) - flows = v.config["flows"] + wanted = list(flows.keys()) + current = list(self.flows.keys()) - wanted = list(flows.keys()) - current = list(self.flows.keys()) + for k in wanted: + if k not in current: + self.flows[k] = json.loads(flows[k]) + await self.start_flow(k, self.flows[k]) - for k in wanted: - if k not in current: - self.flows[k] = json.loads(flows[k]) - await self.start_flow(k, self.flows[k]) - - for k in current: - if k not in wanted: - await self.stop_flow(k, self.flows[k]) - del self.flows[k] + for k in current: + if k not in wanted: + await self.stop_flow(k, self.flows[k]) + del self.flows[k] except Exception as e: logger.error(f"Config processing exception: {e}", exc_info=True) From fce43ae0358237d79374f93d9db07604d677184c Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Thu, 15 Jan 2026 11:04:37 +0000 Subject: [PATCH 7/9] REST API OpenAPI spec (#612) * OpenAPI spec in specs/api. Checked lint with redoc. --- docs/tech-specs/openapi-spec.md | 231 ++++++++++++++++++ specs/api/README.md | 84 +++++++ .../components/common/DocumentMetadata.yaml | 23 ++ .../components/common/ProcessingMetadata.yaml | 21 ++ specs/api/components/common/RdfValue.yaml | 14 ++ specs/api/components/common/Triple.yaml | 16 ++ .../api/components/parameters/Collection.yaml | 8 + specs/api/components/parameters/FlowId.yaml | 7 + specs/api/components/parameters/User.yaml | 8 + specs/api/components/responses/Error.yaml | 23 ++ .../components/responses/Unauthorized.yaml | 9 + .../schemas/agent/AgentRequest.yaml | 59 +++++ .../schemas/agent/AgentResponse.yaml | 51 ++++ .../schemas/collection/CollectionRequest.yaml | 58 +++++ .../collection/CollectionResponse.yaml | 39 +++ .../schemas/common/DocumentMetadata.yaml | 26 ++ .../schemas/common/ProcessingMetadata.yaml | 25 ++ .../components/schemas/common/RdfValue.yaml | 21 ++ .../api/components/schemas/common/Triple.yaml | 29 +++ .../schemas/config/ConfigRequest.yaml | 67 +++++ .../schemas/config/ConfigResponse.yaml | 49 ++++ .../schemas/diag/StructuredDiagRequest.yaml | 46 ++++ .../schemas/diag/StructuredDiagResponse.yaml | 49 ++++ .../DocumentEmbeddingsQueryRequest.yaml | 29 +++ .../DocumentEmbeddingsQueryResponse.yaml | 12 + .../GraphEmbeddingsQueryRequest.yaml | 29 +++ .../GraphEmbeddingsQueryResponse.yaml | 12 + .../schemas/embeddings/EmbeddingsRequest.yaml | 10 + .../embeddings/EmbeddingsResponse.yaml | 11 + .../schemas/errors/ErrorObject.yaml | 14 ++ .../components/schemas/flow/FlowRequest.yaml | 76 ++++++ .../components/schemas/flow/FlowResponse.yaml | 82 +++++++ .../schemas/knowledge/KnowledgeRequest.yaml | 128 ++++++++++ .../schemas/knowledge/KnowledgeResponse.yaml | 91 +++++++ .../schemas/librarian/LibrarianRequest.yaml | 79 ++++++ .../schemas/librarian/LibrarianResponse.yaml | 18 ++ .../schemas/loading/DocumentLoadRequest.yaml | 32 +++ .../schemas/loading/TextLoadRequest.yaml | 37 +++ .../schemas/mcp-tool/McpToolRequest.yaml | 17 ++ .../schemas/mcp-tool/McpToolResponse.yaml | 15 ++ .../schemas/prompt/PromptRequest.yaml | 32 +++ .../schemas/prompt/PromptResponse.yaml | 16 ++ .../schemas/query/NlpQueryRequest.yaml | 17 ++ .../schemas/query/NlpQueryResponse.yaml | 47 ++++ .../schemas/query/ObjectsQueryRequest.yaml | 40 +++ .../schemas/query/ObjectsQueryResponse.yaml | 54 ++++ .../schemas/query/StructuredQueryRequest.yaml | 22 ++ .../query/StructuredQueryResponse.yaml | 34 +++ .../schemas/query/TriplesQueryRequest.yaml | 30 +++ .../schemas/query/TriplesQueryResponse.yaml | 10 + .../schemas/rag/DocumentRagRequest.yaml | 33 +++ .../schemas/rag/DocumentRagResponse.yaml | 24 ++ .../schemas/rag/GraphRagRequest.yaml | 54 ++++ .../schemas/rag/GraphRagResponse.yaml | 24 ++ .../TextCompletionRequest.yaml | 20 ++ .../TextCompletionResponse.yaml | 26 ++ specs/api/openapi.yaml | 160 ++++++++++++ specs/api/paths/collection-management.yaml | 108 ++++++++ specs/api/paths/config.yaml | 165 +++++++++++++ specs/api/paths/export-core.yaml | 108 ++++++++ specs/api/paths/flow.yaml | 194 +++++++++++++++ specs/api/paths/flow/agent.yaml | 130 ++++++++++ specs/api/paths/flow/document-embeddings.yaml | 103 ++++++++ specs/api/paths/flow/document-load.yaml | 119 +++++++++ specs/api/paths/flow/document-rag.yaml | 107 ++++++++ specs/api/paths/flow/embeddings.yaml | 85 +++++++ specs/api/paths/flow/graph-embeddings.yaml | 95 +++++++ specs/api/paths/flow/graph-rag.yaml | 127 ++++++++++ specs/api/paths/flow/mcp-tool.yaml | 119 +++++++++ specs/api/paths/flow/nlp-query.yaml | 148 +++++++++++ specs/api/paths/flow/objects.yaml | 166 +++++++++++++ specs/api/paths/flow/prompt.yaml | 143 +++++++++++ specs/api/paths/flow/structured-diag.yaml | 172 +++++++++++++ specs/api/paths/flow/structured-query.yaml | 134 ++++++++++ specs/api/paths/flow/text-completion.yaml | 125 ++++++++++ specs/api/paths/flow/text-load.yaml | 111 +++++++++ specs/api/paths/flow/triples.yaml | 129 ++++++++++ specs/api/paths/import-core.yaml | 106 ++++++++ specs/api/paths/knowledge.yaml | 196 +++++++++++++++ specs/api/paths/librarian.yaml | 153 ++++++++++++ specs/api/paths/metrics-path.yaml | 29 +++ specs/api/paths/metrics.yaml | 71 ++++++ specs/api/paths/websocket.yaml | 185 ++++++++++++++ specs/api/security/bearerAuth.yaml | 12 + 84 files changed, 5638 insertions(+) create mode 100644 docs/tech-specs/openapi-spec.md create mode 100644 specs/api/README.md create mode 100644 specs/api/components/common/DocumentMetadata.yaml create mode 100644 specs/api/components/common/ProcessingMetadata.yaml create mode 100644 specs/api/components/common/RdfValue.yaml create mode 100644 specs/api/components/common/Triple.yaml create mode 100644 specs/api/components/parameters/Collection.yaml create mode 100644 specs/api/components/parameters/FlowId.yaml create mode 100644 specs/api/components/parameters/User.yaml create mode 100644 specs/api/components/responses/Error.yaml create mode 100644 specs/api/components/responses/Unauthorized.yaml create mode 100644 specs/api/components/schemas/agent/AgentRequest.yaml create mode 100644 specs/api/components/schemas/agent/AgentResponse.yaml create mode 100644 specs/api/components/schemas/collection/CollectionRequest.yaml create mode 100644 specs/api/components/schemas/collection/CollectionResponse.yaml create mode 100644 specs/api/components/schemas/common/DocumentMetadata.yaml create mode 100644 specs/api/components/schemas/common/ProcessingMetadata.yaml create mode 100644 specs/api/components/schemas/common/RdfValue.yaml create mode 100644 specs/api/components/schemas/common/Triple.yaml create mode 100644 specs/api/components/schemas/config/ConfigRequest.yaml create mode 100644 specs/api/components/schemas/config/ConfigResponse.yaml create mode 100644 specs/api/components/schemas/diag/StructuredDiagRequest.yaml create mode 100644 specs/api/components/schemas/diag/StructuredDiagResponse.yaml create mode 100644 specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml create mode 100644 specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml create mode 100644 specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml create mode 100644 specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml create mode 100644 specs/api/components/schemas/embeddings/EmbeddingsRequest.yaml create mode 100644 specs/api/components/schemas/embeddings/EmbeddingsResponse.yaml create mode 100644 specs/api/components/schemas/errors/ErrorObject.yaml create mode 100644 specs/api/components/schemas/flow/FlowRequest.yaml create mode 100644 specs/api/components/schemas/flow/FlowResponse.yaml create mode 100644 specs/api/components/schemas/knowledge/KnowledgeRequest.yaml create mode 100644 specs/api/components/schemas/knowledge/KnowledgeResponse.yaml create mode 100644 specs/api/components/schemas/librarian/LibrarianRequest.yaml create mode 100644 specs/api/components/schemas/librarian/LibrarianResponse.yaml create mode 100644 specs/api/components/schemas/loading/DocumentLoadRequest.yaml create mode 100644 specs/api/components/schemas/loading/TextLoadRequest.yaml create mode 100644 specs/api/components/schemas/mcp-tool/McpToolRequest.yaml create mode 100644 specs/api/components/schemas/mcp-tool/McpToolResponse.yaml create mode 100644 specs/api/components/schemas/prompt/PromptRequest.yaml create mode 100644 specs/api/components/schemas/prompt/PromptResponse.yaml create mode 100644 specs/api/components/schemas/query/NlpQueryRequest.yaml create mode 100644 specs/api/components/schemas/query/NlpQueryResponse.yaml create mode 100644 specs/api/components/schemas/query/ObjectsQueryRequest.yaml create mode 100644 specs/api/components/schemas/query/ObjectsQueryResponse.yaml create mode 100644 specs/api/components/schemas/query/StructuredQueryRequest.yaml create mode 100644 specs/api/components/schemas/query/StructuredQueryResponse.yaml create mode 100644 specs/api/components/schemas/query/TriplesQueryRequest.yaml create mode 100644 specs/api/components/schemas/query/TriplesQueryResponse.yaml create mode 100644 specs/api/components/schemas/rag/DocumentRagRequest.yaml create mode 100644 specs/api/components/schemas/rag/DocumentRagResponse.yaml create mode 100644 specs/api/components/schemas/rag/GraphRagRequest.yaml create mode 100644 specs/api/components/schemas/rag/GraphRagResponse.yaml create mode 100644 specs/api/components/schemas/text-completion/TextCompletionRequest.yaml create mode 100644 specs/api/components/schemas/text-completion/TextCompletionResponse.yaml create mode 100644 specs/api/openapi.yaml create mode 100644 specs/api/paths/collection-management.yaml create mode 100644 specs/api/paths/config.yaml create mode 100644 specs/api/paths/export-core.yaml create mode 100644 specs/api/paths/flow.yaml create mode 100644 specs/api/paths/flow/agent.yaml create mode 100644 specs/api/paths/flow/document-embeddings.yaml create mode 100644 specs/api/paths/flow/document-load.yaml create mode 100644 specs/api/paths/flow/document-rag.yaml create mode 100644 specs/api/paths/flow/embeddings.yaml create mode 100644 specs/api/paths/flow/graph-embeddings.yaml create mode 100644 specs/api/paths/flow/graph-rag.yaml create mode 100644 specs/api/paths/flow/mcp-tool.yaml create mode 100644 specs/api/paths/flow/nlp-query.yaml create mode 100644 specs/api/paths/flow/objects.yaml create mode 100644 specs/api/paths/flow/prompt.yaml create mode 100644 specs/api/paths/flow/structured-diag.yaml create mode 100644 specs/api/paths/flow/structured-query.yaml create mode 100644 specs/api/paths/flow/text-completion.yaml create mode 100644 specs/api/paths/flow/text-load.yaml create mode 100644 specs/api/paths/flow/triples.yaml create mode 100644 specs/api/paths/import-core.yaml create mode 100644 specs/api/paths/knowledge.yaml create mode 100644 specs/api/paths/librarian.yaml create mode 100644 specs/api/paths/metrics-path.yaml create mode 100644 specs/api/paths/metrics.yaml create mode 100644 specs/api/paths/websocket.yaml create mode 100644 specs/api/security/bearerAuth.yaml diff --git a/docs/tech-specs/openapi-spec.md b/docs/tech-specs/openapi-spec.md new file mode 100644 index 00000000..ec82681d --- /dev/null +++ b/docs/tech-specs/openapi-spec.md @@ -0,0 +1,231 @@ +# OpenAPI Specification - Technical Spec + +## Goal + +Create a comprehensive, modular OpenAPI 3.1 specification for the TrustGraph REST API Gateway that: +- Documents all REST endpoints +- Uses external `$ref` for modularity and maintainability +- Maps directly to the message translator code +- Provides accurate request/response schemas + +## Source of Truth + +The API is defined by: +- **Message Translators**: `trustgraph-base/trustgraph/messaging/translators/*.py` +- **Dispatcher Manager**: `trustgraph-flow/trustgraph/gateway/dispatch/manager.py` +- **Endpoint Manager**: `trustgraph-flow/trustgraph/gateway/endpoint/manager.py` + +## Directory Structure + +``` +openapi/ +├── openapi.yaml # Main entry point +├── paths/ +│ ├── config.yaml # Global services +│ ├── flow.yaml +│ ├── librarian.yaml +│ ├── knowledge.yaml +│ ├── collection-management.yaml +│ ├── flow-services/ # Flow-hosted services +│ │ ├── agent.yaml +│ │ ├── document-rag.yaml +│ │ ├── graph-rag.yaml +│ │ ├── text-completion.yaml +│ │ ├── prompt.yaml +│ │ ├── embeddings.yaml +│ │ ├── mcp-tool.yaml +│ │ ├── triples.yaml +│ │ ├── objects.yaml +│ │ ├── nlp-query.yaml +│ │ ├── structured-query.yaml +│ │ ├── structured-diag.yaml +│ │ ├── graph-embeddings.yaml +│ │ ├── document-embeddings.yaml +│ │ ├── text-load.yaml +│ │ └── document-load.yaml +│ ├── import-export/ +│ │ ├── core-import.yaml +│ │ ├── core-export.yaml +│ │ └── flow-import-export.yaml # WebSocket import/export +│ ├── websocket.yaml +│ └── metrics.yaml +├── components/ +│ ├── schemas/ +│ │ ├── config/ +│ │ ├── flow/ +│ │ ├── librarian/ +│ │ ├── knowledge/ +│ │ ├── collection/ +│ │ ├── ai-services/ +│ │ ├── common/ +│ │ └── errors/ +│ ├── parameters/ +│ ├── responses/ +│ └── examples/ +└── security/ + └── bearerAuth.yaml +``` + +## Service Mapping + +### Global Services (`/api/v1/{kind}`) +- `config` - Configuration management +- `flow` - Flow lifecycle +- `librarian` - Document library +- `knowledge` - Knowledge cores +- `collection-management` - Collection metadata + +### Flow-Hosted Services (`/api/v1/flow/{flow}/service/{kind}`) + +**Request/Response:** +- `agent`, `text-completion`, `prompt`, `mcp-tool` +- `graph-rag`, `document-rag` +- `embeddings`, `graph-embeddings`, `document-embeddings` +- `triples`, `objects`, `nlp-query`, `structured-query`, `structured-diag` + +**Fire-and-Forget:** +- `text-load`, `document-load` + +### Import/Export +- `/api/v1/import-core` (POST) +- `/api/v1/export-core` (GET) +- `/api/v1/flow/{flow}/import/{kind}` (WebSocket) +- `/api/v1/flow/{flow}/export/{kind}` (WebSocket) + +### Other +- `/api/v1/socket` (WebSocket multiplexed) +- `/api/metrics` (Prometheus) + +## Approach + +### Phase 1: Setup +1. Create directory structure +2. Create main `openapi.yaml` with metadata, servers, security +3. Create reusable components (errors, common parameters, security schemes) + +### Phase 2: Common Schemas +Create shared schemas used across services: +- `RdfValue`, `Triple` - RDF/triple structures +- `ErrorObject` - Error response +- `DocumentMetadata`, `ProcessingMetadata` - Metadata structures +- Common parameters: `FlowId`, `User`, `Collection` + +### Phase 3: Global Services +For each global service (config, flow, librarian, knowledge, collection-management): +1. Create path file in `paths/` +2. Create request schema in `components/schemas/{service}/` +3. Create response schema +4. Add examples +5. Reference from main `openapi.yaml` + +### Phase 4: Flow-Hosted Services +For each flow-hosted service: +1. Create path file in `paths/flow-services/` +2. Create request/response schemas in `components/schemas/ai-services/` +3. Add streaming flag documentation where applicable +4. Reference from main `openapi.yaml` + +### Phase 5: Import/Export & WebSocket +1. Document core import/export endpoints +2. Document WebSocket protocol patterns +3. Document flow-level import/export WebSocket endpoints + +### Phase 6: Validation +1. Validate with OpenAPI validator tools +2. Test with Swagger UI +3. Verify all translators are covered + +## Field Naming Convention + +All JSON fields use **kebab-case**: +- `flow-id`, `blueprint-name`, `doc-limit`, `entity-limit`, etc. + +## Creating Schema Files + +For each translator in `trustgraph-base/trustgraph/messaging/translators/`: + +1. **Read translator `to_pulsar()` method** - Defines request schema +2. **Read translator `from_pulsar()` method** - Defines response schema +3. **Extract field names and types** +4. **Create OpenAPI schema** with: + - Field names (kebab-case) + - Types (string, integer, boolean, object, array) + - Required fields + - Defaults + - Descriptions + +### Example Mapping Process + +```python +# From retrieval.py DocumentRagRequestTranslator +def to_pulsar(self, data: Dict[str, Any]) -> DocumentRagQuery: + return DocumentRagQuery( + query=data["query"], # required string + user=data.get("user", "trustgraph"), # optional string, default "trustgraph" + collection=data.get("collection", "default"), # optional string, default "default" + doc_limit=int(data.get("doc-limit", 20)), # optional integer, default 20 + streaming=data.get("streaming", False) # optional boolean, default false + ) +``` + +Maps to: + +```yaml +# components/schemas/ai-services/DocumentRagRequest.yaml +type: object +required: + - query +properties: + query: + type: string + description: Search query + user: + type: string + default: trustgraph + collection: + type: string + default: default + doc-limit: + type: integer + default: 20 + description: Maximum number of documents to retrieve + streaming: + type: boolean + default: false + description: Enable streaming responses +``` + +## Streaming Responses + +Services that support streaming return multiple responses with `end_of_stream` flag: +- `agent`, `text-completion`, `prompt` +- `document-rag`, `graph-rag` + +Document this pattern in each service's response schema. + +## Error Responses + +All services can return: +```yaml +error: + oneOf: + - type: string + - $ref: '#/components/schemas/ErrorObject' +``` + +Where `ErrorObject` is: +```yaml +type: object +properties: + type: + type: string + message: + type: string +``` + +## References + +- Translators: `trustgraph-base/trustgraph/messaging/translators/` +- Dispatcher mapping: `trustgraph-flow/trustgraph/gateway/dispatch/manager.py` +- Endpoint routing: `trustgraph-flow/trustgraph/gateway/endpoint/manager.py` +- Service summary: `API_SERVICES_SUMMARY.md` diff --git a/specs/api/README.md b/specs/api/README.md new file mode 100644 index 00000000..b9335579 --- /dev/null +++ b/specs/api/README.md @@ -0,0 +1,84 @@ +# TrustGraph OpenAPI Specification + +This directory contains the modular OpenAPI 3.1 specification for the TrustGraph REST API Gateway. + +## Structure + +``` +specs/api/ +├── openapi.yaml # Main entry point +├── paths/ # Endpoint definitions +│ ├── config.yaml +│ ├── flow.yaml +│ ├── flow-services/ # Flow-hosted services +│ └── import-export/ +├── components/ +│ ├── schemas/ # Request/response schemas +│ │ ├── config/ +│ │ ├── flow/ +│ │ ├── ai-services/ +│ │ ├── common/ +│ │ └── errors/ +│ ├── parameters/ # Reusable parameters +│ ├── responses/ # Reusable responses +│ └── examples/ # Example payloads +└── security/ # Security schemes + └── bearerAuth.yaml +``` + +## Viewing the Spec + +### Swagger UI + +```bash +# Install swagger-ui +npm install -g swagger-ui-watcher + +# View in browser +swagger-ui-watcher specs/api/openapi.yaml +``` + +### Redoc + +```bash +# Install redoc-cli +npm install -g redoc-cli + +# Generate static HTML +redoc-cli bundle specs/api/openapi.yaml -o api-docs.html + +# View +open api-docs.html +``` + +### Online Validators + +Upload `openapi.yaml` to: +- https://editor.swagger.io/ +- https://redocly.com/redoc/ + +## Validation + +```bash +# Install openapi-spec-validator +pip install openapi-spec-validator + +# Validate +openapi-spec-validator specs/api/openapi.yaml +``` + +## Development + +When adding a new service: + +1. Create schema files in `components/schemas/{service}/` +2. Create path file in `paths/` or `paths/flow-services/` +3. Add examples if needed +4. Reference from `openapi.yaml` +5. Validate + +## References + +- [OpenAPI 3.1 Specification](https://spec.openapis.org/oas/v3.1.0) +- [TrustGraph Tech Spec](../../docs/tech-specs/openapi-spec.md) +- [API Services Summary](../../API_SERVICES_SUMMARY.md) diff --git a/specs/api/components/common/DocumentMetadata.yaml b/specs/api/components/common/DocumentMetadata.yaml new file mode 100644 index 00000000..43edc273 --- /dev/null +++ b/specs/api/components/common/DocumentMetadata.yaml @@ -0,0 +1,23 @@ +type: object +description: Document metadata +properties: + url: + type: string + description: Document URL + example: https://example.com/document.pdf + title: + type: string + description: Document title + example: Example Document + author: + type: string + description: Document author + example: John Doe + metadata: + type: object + description: Additional metadata + additionalProperties: + type: string + example: + department: Engineering + category: Technical diff --git a/specs/api/components/common/ProcessingMetadata.yaml b/specs/api/components/common/ProcessingMetadata.yaml new file mode 100644 index 00000000..8f141383 --- /dev/null +++ b/specs/api/components/common/ProcessingMetadata.yaml @@ -0,0 +1,21 @@ +type: object +description: Processing task metadata +properties: + flow: + type: string + description: Flow ID + example: my-flow + collection: + type: string + description: Collection identifier + example: default + status: + type: string + description: Processing status + enum: [pending, processing, completed, failed] + example: processing + timestamp: + type: string + description: ISO timestamp + format: date-time + example: "2024-01-15T10:30:00Z" diff --git a/specs/api/components/common/RdfValue.yaml b/specs/api/components/common/RdfValue.yaml new file mode 100644 index 00000000..5ed7c992 --- /dev/null +++ b/specs/api/components/common/RdfValue.yaml @@ -0,0 +1,14 @@ +type: object +description: RDF value - can be entity/URI or literal +required: + - v + - e +properties: + v: + type: string + description: Value (URI or literal text) + example: https://example.com/entity1 + e: + type: boolean + description: True if entity/URI, false if literal + example: true diff --git a/specs/api/components/common/Triple.yaml b/specs/api/components/common/Triple.yaml new file mode 100644 index 00000000..142be0e9 --- /dev/null +++ b/specs/api/components/common/Triple.yaml @@ -0,0 +1,16 @@ +type: object +description: RDF triple (subject-predicate-object) +required: + - s + - p + - o +properties: + s: + $ref: './RdfValue.yaml' + description: Subject + p: + $ref: './RdfValue.yaml' + description: Predicate + o: + $ref: './RdfValue.yaml' + description: Object diff --git a/specs/api/components/parameters/Collection.yaml b/specs/api/components/parameters/Collection.yaml new file mode 100644 index 00000000..ecbb0836 --- /dev/null +++ b/specs/api/components/parameters/Collection.yaml @@ -0,0 +1,8 @@ +name: collection +in: query +required: false +schema: + type: string + default: default +description: Collection identifier +example: default diff --git a/specs/api/components/parameters/FlowId.yaml b/specs/api/components/parameters/FlowId.yaml new file mode 100644 index 00000000..98f6e149 --- /dev/null +++ b/specs/api/components/parameters/FlowId.yaml @@ -0,0 +1,7 @@ +name: flow +in: path +required: true +schema: + type: string +description: Flow instance ID +example: my-flow diff --git a/specs/api/components/parameters/User.yaml b/specs/api/components/parameters/User.yaml new file mode 100644 index 00000000..ad0657ca --- /dev/null +++ b/specs/api/components/parameters/User.yaml @@ -0,0 +1,8 @@ +name: user +in: query +required: false +schema: + type: string + default: trustgraph +description: User identifier +example: alice diff --git a/specs/api/components/responses/Error.yaml b/specs/api/components/responses/Error.yaml new file mode 100644 index 00000000..c3dbe5aa --- /dev/null +++ b/specs/api/components/responses/Error.yaml @@ -0,0 +1,23 @@ +description: Error response +content: + application/json: + schema: + type: object + properties: + error: + oneOf: + - type: string + description: Simple error message + - $ref: '../schemas/errors/ErrorObject.yaml' + description: Structured error with type and message + examples: + simpleError: + summary: Simple error message + value: + error: Invalid flow ID + structuredError: + summary: Structured error + value: + error: + type: gateway-error + message: Timeout diff --git a/specs/api/components/responses/Unauthorized.yaml b/specs/api/components/responses/Unauthorized.yaml new file mode 100644 index 00000000..6f903c39 --- /dev/null +++ b/specs/api/components/responses/Unauthorized.yaml @@ -0,0 +1,9 @@ +description: Unauthorized - Invalid or missing bearer token +content: + application/json: + schema: + type: object + properties: + error: + type: string + example: Unauthorized diff --git a/specs/api/components/schemas/agent/AgentRequest.yaml b/specs/api/components/schemas/agent/AgentRequest.yaml new file mode 100644 index 00000000..ddf2019a --- /dev/null +++ b/specs/api/components/schemas/agent/AgentRequest.yaml @@ -0,0 +1,59 @@ +type: object +description: | + Agent service request - conversational AI agent that can reason and take actions. +required: + - question +properties: + question: + type: string + description: User question or prompt for the agent + example: What is the capital of France? + state: + type: string + description: Agent state for continuation (optional, for multi-turn) + example: agent-state-12345 + group: + type: array + description: Group identifiers for collaborative agents (optional) + items: + type: string + example: ["research-team"] + history: + type: array + description: Conversation history (optional, list of previous agent steps) + items: + type: object + properties: + thought: + type: string + description: Agent's reasoning + example: I need to search for information about Paris + action: + type: string + description: Action taken + example: search + arguments: + type: object + description: Action arguments + additionalProperties: + type: string + example: + query: "capital of France" + observation: + type: string + description: Result of the action + example: "Paris is the capital of France" + user: + type: string + description: User context for this step + example: alice + user: + type: string + description: User identifier for multi-tenancy + default: trustgraph + example: alice + streaming: + type: boolean + description: Enable streaming response delivery + default: false + example: true diff --git a/specs/api/components/schemas/agent/AgentResponse.yaml b/specs/api/components/schemas/agent/AgentResponse.yaml new file mode 100644 index 00000000..86d636b5 --- /dev/null +++ b/specs/api/components/schemas/agent/AgentResponse.yaml @@ -0,0 +1,51 @@ +type: object +description: Agent service response (streaming or legacy format) +properties: + chunk-type: + type: string + description: Type of streaming chunk (streaming mode only) + enum: + - thought + - action + - observation + - answer + - error + example: answer + content: + type: string + description: Chunk content (streaming mode only) + example: Paris is the capital of France. + end-of-message: + type: boolean + description: Current chunk type is complete (streaming mode) + default: false + example: true + end-of-dialog: + type: boolean + description: Entire agent dialog is complete (streaming mode) + default: false + example: true + answer: + type: string + description: Final answer (legacy non-streaming format) + example: Paris is the capital of France. + thought: + type: string + description: Agent reasoning (legacy format) + example: I should search for information about the capital of France. + observation: + type: string + description: Observation from actions (legacy format) + example: Found information about Paris being the capital. + error: + type: object + description: Error details if request failed + properties: + message: + type: string + description: Error message + example: Failed to process agent request + code: + type: string + description: Error code + example: AGENT_ERROR diff --git a/specs/api/components/schemas/collection/CollectionRequest.yaml b/specs/api/components/schemas/collection/CollectionRequest.yaml new file mode 100644 index 00000000..bf3ab7d4 --- /dev/null +++ b/specs/api/components/schemas/collection/CollectionRequest.yaml @@ -0,0 +1,58 @@ +type: object +description: | + Collection management request. + + Operations: list-collections, update-collection, delete-collection +required: + - operation +properties: + operation: + type: string + enum: + - list-collections + - update-collection + - delete-collection + description: | + Collection operation: + - `list-collections`: List collections for user + - `update-collection`: Create or update collection metadata + - `delete-collection`: Delete collection + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection identifier (for update, delete) + example: research + timestamp: + type: string + description: ISO timestamp + format: date-time + example: "2024-01-15T10:30:00Z" + name: + type: string + description: Human-readable collection name (for update) + example: Research Papers + description: + type: string + description: Collection description (for update) + example: Academic research papers on AI and ML + tags: + type: array + description: Collection tags for organization (for update) + items: + type: string + example: ["research", "AI", "academic"] + tag-filter: + type: array + description: Filter collections by tags (for list) + items: + type: string + example: ["research"] + limit: + type: integer + description: Maximum number of results (for list) + default: 0 + example: 100 diff --git a/specs/api/components/schemas/collection/CollectionResponse.yaml b/specs/api/components/schemas/collection/CollectionResponse.yaml new file mode 100644 index 00000000..f924cbf5 --- /dev/null +++ b/specs/api/components/schemas/collection/CollectionResponse.yaml @@ -0,0 +1,39 @@ +type: object +description: Collection management response +properties: + timestamp: + type: string + description: ISO timestamp + format: date-time + example: "2024-01-15T10:30:00Z" + collections: + type: array + description: List of collections (returned by list-collections) + items: + type: object + required: + - user + - collection + properties: + user: + type: string + description: User identifier + example: alice + collection: + type: string + description: Collection identifier + example: research + name: + type: string + description: Human-readable collection name + example: Research Papers + description: + type: string + description: Collection description + example: Academic research papers on AI and ML + tags: + type: array + description: Collection tags + items: + type: string + example: ["research", "AI", "academic"] diff --git a/specs/api/components/schemas/common/DocumentMetadata.yaml b/specs/api/components/schemas/common/DocumentMetadata.yaml new file mode 100644 index 00000000..77e2206e --- /dev/null +++ b/specs/api/components/schemas/common/DocumentMetadata.yaml @@ -0,0 +1,26 @@ +type: object +description: Document metadata for library management +properties: + url: + type: string + description: Document URL or identifier + example: https://example.com/document.pdf + title: + type: string + description: Document title + example: Example Document + author: + type: string + description: Document author + example: John Doe + date: + type: string + description: Document date + example: "2024-01-15" + metadata: + type: object + description: Additional metadata fields + additionalProperties: true + example: + department: Engineering + category: Technical diff --git a/specs/api/components/schemas/common/ProcessingMetadata.yaml b/specs/api/components/schemas/common/ProcessingMetadata.yaml new file mode 100644 index 00000000..d74a0efa --- /dev/null +++ b/specs/api/components/schemas/common/ProcessingMetadata.yaml @@ -0,0 +1,25 @@ +type: object +description: Processing metadata for library document processing +properties: + flow: + type: string + description: Flow ID + example: my-flow + collection: + type: string + description: Collection identifier + example: default + status: + type: string + description: Processing status + enum: [pending, processing, completed, failed] + example: completed + timestamp: + type: string + format: date-time + description: Processing timestamp + example: "2024-01-15T10:30:00Z" + error: + type: string + description: Error message if processing failed + example: Failed to extract text from PDF diff --git a/specs/api/components/schemas/common/RdfValue.yaml b/specs/api/components/schemas/common/RdfValue.yaml new file mode 100644 index 00000000..ce8b4c08 --- /dev/null +++ b/specs/api/components/schemas/common/RdfValue.yaml @@ -0,0 +1,21 @@ +type: object +description: | + RDF value - represents either a URI/entity or a literal value. + + When `e` is true, `v` must be a full URI (e.g., http://schema.org/name). + When `e` is false, `v` is a literal value (string, number, etc.). +properties: + v: + type: string + description: The value - full URI when e=true, literal when e=false + example: http://example.com/Person1 + e: + type: boolean + description: True if entity/URI, false if literal value + example: true +required: + - v + - e +example: + v: http://schema.org/name + e: true diff --git a/specs/api/components/schemas/common/Triple.yaml b/specs/api/components/schemas/common/Triple.yaml new file mode 100644 index 00000000..1f72b89a --- /dev/null +++ b/specs/api/components/schemas/common/Triple.yaml @@ -0,0 +1,29 @@ +type: object +description: | + RDF triple representing a subject-predicate-object statement in the knowledge graph. + + Example: (Person1) -[has name]-> ("John Doe") +properties: + s: + $ref: './RdfValue.yaml' + description: Subject - the entity the statement is about + p: + $ref: './RdfValue.yaml' + description: Predicate - the property or relationship + o: + $ref: './RdfValue.yaml' + description: Object - the value or target entity +required: + - s + - p + - o +example: + s: + v: http://example.com/Person1 + e: true + p: + v: http://schema.org/name + e: true + o: + v: John Doe + e: false diff --git a/specs/api/components/schemas/config/ConfigRequest.yaml b/specs/api/components/schemas/config/ConfigRequest.yaml new file mode 100644 index 00000000..aa39e519 --- /dev/null +++ b/specs/api/components/schemas/config/ConfigRequest.yaml @@ -0,0 +1,67 @@ +type: object +description: | + Configuration service request. + + Supports operations: config, list, get, put, delete +required: + - operation +properties: + operation: + type: string + enum: [config, list, get, put, delete] + description: | + Operation to perform: + - `config`: Get complete configuration + - `list`: List all items of a specific type + - `get`: Get specific configuration items + - `put`: Set/update configuration values + - `delete`: Delete configuration items + example: config + type: + type: string + description: | + Configuration type (required for list, get, put, delete operations). + Common types: flow, prompt, token-cost, parameter-type, interface-description + example: flow + keys: + type: array + description: Keys to retrieve (for get operation) or delete (for delete operation) + items: + type: object + required: + - type + - key + properties: + type: + type: string + description: Configuration type + example: flow + key: + type: string + description: Configuration key + example: my-flow + values: + type: array + description: Values to set/update (for put operation) + items: + type: object + required: + - type + - key + - value + properties: + type: + type: string + description: Configuration type + example: flow + key: + type: string + description: Configuration key + example: my-flow + value: + type: object + description: Configuration value (structure depends on type) + additionalProperties: true + example: + blueprint-name: document-rag + description: My RAG flow diff --git a/specs/api/components/schemas/config/ConfigResponse.yaml b/specs/api/components/schemas/config/ConfigResponse.yaml new file mode 100644 index 00000000..9815c51e --- /dev/null +++ b/specs/api/components/schemas/config/ConfigResponse.yaml @@ -0,0 +1,49 @@ +type: object +description: Configuration service response +properties: + version: + type: integer + description: Configuration version number + example: 42 + config: + type: object + description: Complete configuration (returned by 'config' operation) + additionalProperties: true + example: + flow: + default: + blueprint-name: document-rag+graph-rag + description: Default flow + prompt: + system: You are a helpful AI assistant + token-cost: + gpt-4: + prompt: 0.03 + completion: 0.06 + directory: + type: array + description: List of keys (returned by 'list' operation) + items: + type: string + example: + - default + - production + - my-flow + values: + type: array + description: Retrieved configuration values (returned by 'get' operation) + items: + type: object + properties: + type: + type: string + example: flow + key: + type: string + example: default + value: + type: object + additionalProperties: true + example: + blueprint-name: document-rag+graph-rag + description: Default flow diff --git a/specs/api/components/schemas/diag/StructuredDiagRequest.yaml b/specs/api/components/schemas/diag/StructuredDiagRequest.yaml new file mode 100644 index 00000000..cb692e19 --- /dev/null +++ b/specs/api/components/schemas/diag/StructuredDiagRequest.yaml @@ -0,0 +1,46 @@ +type: object +description: | + Structured data diagnosis request - analyze and understand structured data formats. + + Operations: detect-type, generate-descriptor, diagnose, schema-selection +required: + - operation + - sample +properties: + operation: + type: string + enum: + - detect-type + - generate-descriptor + - diagnose + - schema-selection + description: | + Diagnosis operation: + - `detect-type`: Identify data format (CSV, JSON, XML) + - `generate-descriptor`: Create schema descriptor for data + - `diagnose`: Full analysis (detect + generate descriptor) + - `schema-selection`: Find matching schemas for data + sample: + type: string + description: Data sample to analyze (text content) + example: | + name,age,email + Alice,30,alice@example.com + Bob,25,bob@example.com + type: + type: string + description: Data type (required for generate-descriptor) + enum: [csv, json, xml] + example: csv + schema-name: + type: string + description: Target schema name for descriptor generation (optional) + example: person-records + options: + type: object + description: Format-specific options (e.g., CSV delimiter) + additionalProperties: + type: string + example: + delimiter: "," + has_header: "true" diff --git a/specs/api/components/schemas/diag/StructuredDiagResponse.yaml b/specs/api/components/schemas/diag/StructuredDiagResponse.yaml new file mode 100644 index 00000000..e41009a4 --- /dev/null +++ b/specs/api/components/schemas/diag/StructuredDiagResponse.yaml @@ -0,0 +1,49 @@ +type: object +description: Structured data diagnosis response +required: + - operation +properties: + operation: + type: string + description: Operation that was performed + example: diagnose + detected-type: + type: string + description: Detected data format (for detect-type/diagnose) + enum: [csv, json, xml] + example: csv + confidence: + type: number + description: Detection confidence score (0.0-1.0) + minimum: 0.0 + maximum: 1.0 + example: 0.95 + descriptor: + type: object + description: Generated schema descriptor (for generate-descriptor/diagnose) + additionalProperties: {} + example: + schema_name: person-records + type: csv + fields: + - name: name + type: string + - name: age + type: integer + - name: email + type: string + metadata: + type: object + description: Additional analysis metadata + additionalProperties: + type: string + example: + field_count: "3" + record_count: "2" + has_header: "true" + schema-matches: + type: array + description: Matching schema IDs (for schema-selection) + items: + type: string + example: ["person-schema-v1", "contact-schema-v2"] diff --git a/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml b/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml new file mode 100644 index 00000000..f2d0aec2 --- /dev/null +++ b/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml @@ -0,0 +1,29 @@ +type: object +description: | + Document embeddings query request - find similar documents by vector similarity. +required: + - vectors +properties: + vectors: + type: array + description: Query embedding vector + items: + type: number + example: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156] + limit: + type: integer + description: Maximum number of document chunks to return + default: 10 + minimum: 1 + maximum: 1000 + example: 20 + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection to search + default: default + example: research diff --git a/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml b/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml new file mode 100644 index 00000000..6b1d811d --- /dev/null +++ b/specs/api/components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml @@ -0,0 +1,12 @@ +type: object +description: Document embeddings query response +properties: + chunks: + type: array + description: Similar document chunks (text strings) + items: + type: string + example: + - "Quantum computing uses quantum mechanics principles for computation..." + - "Neural networks are computing systems inspired by biological neurons..." + - "Machine learning algorithms learn patterns from data..." diff --git a/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml b/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml new file mode 100644 index 00000000..6cf60bbd --- /dev/null +++ b/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml @@ -0,0 +1,29 @@ +type: object +description: | + Graph embeddings query request - find similar entities by vector similarity. +required: + - vectors +properties: + vectors: + type: array + description: Query embedding vector + items: + type: number + example: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156] + limit: + type: integer + description: Maximum number of entities to return + default: 10 + minimum: 1 + maximum: 1000 + example: 20 + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection to search + default: default + example: research diff --git a/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml b/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml new file mode 100644 index 00000000..80692a12 --- /dev/null +++ b/specs/api/components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml @@ -0,0 +1,12 @@ +type: object +description: Graph embeddings query response +properties: + entities: + type: array + description: Similar entities (RDF values) + items: + $ref: '../../common/RdfValue.yaml' + example: + - {v: "https://example.com/person/alice", e: true} + - {v: "https://example.com/person/bob", e: true} + - {v: "https://example.com/concept/quantum", e: true} diff --git a/specs/api/components/schemas/embeddings/EmbeddingsRequest.yaml b/specs/api/components/schemas/embeddings/EmbeddingsRequest.yaml new file mode 100644 index 00000000..94369108 --- /dev/null +++ b/specs/api/components/schemas/embeddings/EmbeddingsRequest.yaml @@ -0,0 +1,10 @@ +type: object +description: | + Embeddings request - convert text to vector embedding. +required: + - text +properties: + text: + type: string + description: Text to convert to embedding vector + example: Quantum computing uses quantum mechanics principles for computation. diff --git a/specs/api/components/schemas/embeddings/EmbeddingsResponse.yaml b/specs/api/components/schemas/embeddings/EmbeddingsResponse.yaml new file mode 100644 index 00000000..8a5c01cd --- /dev/null +++ b/specs/api/components/schemas/embeddings/EmbeddingsResponse.yaml @@ -0,0 +1,11 @@ +type: object +description: Embeddings response +required: + - vectors +properties: + vectors: + type: array + description: Embedding vector (array of floats) + items: + type: number + example: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156] diff --git a/specs/api/components/schemas/errors/ErrorObject.yaml b/specs/api/components/schemas/errors/ErrorObject.yaml new file mode 100644 index 00000000..3a93a7dc --- /dev/null +++ b/specs/api/components/schemas/errors/ErrorObject.yaml @@ -0,0 +1,14 @@ +type: object +description: Structured error response with type and message +properties: + type: + type: string + description: Error type identifier + example: gateway-error + message: + type: string + description: Human-readable error message + example: Timeout +required: + - type + - message diff --git a/specs/api/components/schemas/flow/FlowRequest.yaml b/specs/api/components/schemas/flow/FlowRequest.yaml new file mode 100644 index 00000000..8cff7955 --- /dev/null +++ b/specs/api/components/schemas/flow/FlowRequest.yaml @@ -0,0 +1,76 @@ +type: object +description: | + Flow service request for managing flow instances and blueprints. + + Operations: start-flow, stop-flow, list-flows, get-flow, + list-blueprints, get-blueprint, put-blueprint, delete-blueprint +required: + - operation +properties: + operation: + type: string + enum: + - start-flow + - stop-flow + - list-flows + - get-flow + - list-blueprints + - get-blueprint + - put-blueprint + - delete-blueprint + description: | + Flow operation: + - `start-flow`: Start a new flow instance from a blueprint + - `stop-flow`: Stop a running flow instance + - `list-flows`: List all running flow instances + - `get-flow`: Get details of a running flow + - `list-blueprints`: List available flow blueprints + - `get-blueprint`: Get blueprint definition + - `put-blueprint`: Create/update blueprint definition + - `delete-blueprint`: Delete blueprint definition + flow-id: + type: string + description: Flow instance ID (required for start-flow, stop-flow, get-flow) + example: my-flow + blueprint-name: + type: string + description: Flow blueprint name (required for start-flow, get-blueprint, put-blueprint, delete-blueprint) + example: document-rag + blueprint-definition: + type: object + description: Flow blueprint definition (required for put-blueprint) + additionalProperties: true + example: + description: Custom RAG pipeline + parameters: + model: + type: llm-model + description: LLM model for processing + order: 1 + class: + text-completion:{class}: + request: non-persistent://tg/request/text-completion:{class} + response: non-persistent://tg/response/text-completion:{class} + flow: + chunker:{id}: + input: persistent://tg/flow/chunk:{id} + output: persistent://tg/flow/chunk-load:{id} + interfaces: + agent: + request: non-persistent://tg/request/agent:{id} + response: non-persistent://tg/response/agent:{id} + description: + type: string + description: Flow description (optional for start-flow) + example: My document processing flow + parameters: + type: object + description: | + Flow parameters (for start-flow). + All values are stored as strings, regardless of input type. + additionalProperties: + type: string + example: + model: gpt-4 + temperature: "0.7" + chunk-size: "1000" diff --git a/specs/api/components/schemas/flow/FlowResponse.yaml b/specs/api/components/schemas/flow/FlowResponse.yaml new file mode 100644 index 00000000..c93ae42c --- /dev/null +++ b/specs/api/components/schemas/flow/FlowResponse.yaml @@ -0,0 +1,82 @@ +type: object +description: Flow service response +properties: + flow-id: + type: string + description: Flow instance ID (returned by start-flow) + example: my-flow + flow-ids: + type: array + description: List of running flow IDs (returned by list-flows) + items: + type: string + example: + - default + - production + - my-flow + blueprint-names: + type: array + description: List of available blueprint names (returned by list-blueprints) + items: + type: string + example: + - document-rag + - graph-rag + - document-rag+graph-rag + blueprint-definition: + type: object + description: Blueprint definition (returned by get-blueprint) + additionalProperties: true + example: + description: Standard RAG pipeline + parameters: + model: + type: llm-model + order: 1 + class: + text-completion:{class}: + request: non-persistent://tg/request/text-completion:{class} + response: non-persistent://tg/response/text-completion:{class} + flow: + chunker:{id}: + input: persistent://tg/flow/chunk:{id} + output: persistent://tg/flow/chunk-load:{id} + interfaces: + agent: + request: non-persistent://tg/request/agent:{id} + response: non-persistent://tg/response/agent:{id} + flow: + type: object + description: Flow instance details (returned by get-flow) + properties: + blueprint-name: + type: string + example: document-rag + description: + type: string + example: My document processing flow + parameters: + type: object + description: Flow parameters (all values are strings) + additionalProperties: + type: string + example: + model: gpt-4 + temperature: "0.7" + interfaces: + type: object + description: Service interfaces with resolved queue names + additionalProperties: true + example: + agent: + request: non-persistent://tg/request/agent:my-flow + response: non-persistent://tg/response/agent:my-flow + text-load: persistent://tg/flow/text-document-load:my-flow + description: + type: string + description: Description + parameters: + type: object + description: Parameters + additionalProperties: + type: string diff --git a/specs/api/components/schemas/knowledge/KnowledgeRequest.yaml b/specs/api/components/schemas/knowledge/KnowledgeRequest.yaml new file mode 100644 index 00000000..5c40e118 --- /dev/null +++ b/specs/api/components/schemas/knowledge/KnowledgeRequest.yaml @@ -0,0 +1,128 @@ +type: object +description: | + Knowledge graph core management request. + + Operations: list-kg-cores, get-kg-core, put-kg-core, delete-kg-core, + load-kg-core, unload-kg-core +required: + - operation +properties: + operation: + type: string + enum: + - list-kg-cores + - get-kg-core + - put-kg-core + - delete-kg-core + - load-kg-core + - unload-kg-core + description: | + Knowledge core operation: + - `list-kg-cores`: List knowledge cores for user + - `get-kg-core`: Get knowledge core by ID + - `put-kg-core`: Store triples and/or embeddings + - `delete-kg-core`: Delete knowledge core by ID + - `load-kg-core`: Load knowledge core into flow + - `unload-kg-core`: Unload knowledge core from flow + user: + type: string + description: User identifier (for list-kg-cores, put-kg-core, delete-kg-core) + default: trustgraph + example: alice + id: + type: string + description: Knowledge core ID (for get, put, delete, load, unload) + example: core-123 + flow: + type: string + description: Flow ID (for load-kg-core) + example: my-flow + collection: + type: string + description: Collection identifier (for load-kg-core) + default: default + example: default + triples: + type: object + description: Triples to store (for put-kg-core) + required: + - metadata + - triples + properties: + metadata: + type: object + required: + - id + - user + - collection + properties: + id: + type: string + description: Knowledge core ID + example: core-123 + user: + type: string + description: User identifier + example: alice + collection: + type: string + description: Collection identifier + example: default + metadata: + type: array + description: Metadata triples + items: + $ref: '../../common/Triple.yaml' + triples: + type: array + description: Knowledge triples + items: + $ref: '../../common/Triple.yaml' + graph-embeddings: + type: object + description: Graph embeddings to store (for put-kg-core) + required: + - metadata + - entities + properties: + metadata: + type: object + required: + - id + - user + - collection + properties: + id: + type: string + description: Knowledge core ID + example: core-123 + user: + type: string + description: User identifier + example: alice + collection: + type: string + description: Collection identifier + example: default + metadata: + type: array + description: Metadata triples + items: + $ref: '../../common/Triple.yaml' + entities: + type: array + description: Entity embeddings + items: + type: object + required: + - entity + - vectors + properties: + entity: + $ref: '../../common/RdfValue.yaml' + vectors: + type: array + description: Embedding vectors + items: + type: number + example: [0.1, 0.2, 0.3] diff --git a/specs/api/components/schemas/knowledge/KnowledgeResponse.yaml b/specs/api/components/schemas/knowledge/KnowledgeResponse.yaml new file mode 100644 index 00000000..229233ca --- /dev/null +++ b/specs/api/components/schemas/knowledge/KnowledgeResponse.yaml @@ -0,0 +1,91 @@ +type: object +description: Knowledge service response +properties: + ids: + type: array + description: List of knowledge core IDs (returned by list-kg-cores) + items: + type: string + example: ["core-123", "core-456"] + triples: + type: object + description: Triples data (returned by get-kg-core, streamed) + properties: + metadata: + type: object + required: + - id + - user + - collection + properties: + id: + type: string + description: Knowledge core ID + example: core-123 + user: + type: string + description: User identifier + example: alice + collection: + type: string + description: Collection identifier + example: default + metadata: + type: array + description: Metadata triples + items: + $ref: '../../common/Triple.yaml' + triples: + type: array + description: Knowledge triples + items: + $ref: '../../common/Triple.yaml' + graph-embeddings: + type: object + description: Graph embeddings data (returned by get-kg-core, streamed) + properties: + metadata: + type: object + required: + - id + - user + - collection + properties: + id: + type: string + description: Knowledge core ID + example: core-123 + user: + type: string + description: User identifier + example: alice + collection: + type: string + description: Collection identifier + example: default + metadata: + type: array + description: Metadata triples + items: + $ref: '../../common/Triple.yaml' + entities: + type: array + description: Entity embeddings + items: + type: object + required: + - entity + - vectors + properties: + entity: + $ref: '../../common/RdfValue.yaml' + vectors: + type: array + description: Embedding vectors + items: + type: number + example: [0.1, 0.2, 0.3] + eos: + type: boolean + description: End of stream marker (for streaming responses) + example: true diff --git a/specs/api/components/schemas/librarian/LibrarianRequest.yaml b/specs/api/components/schemas/librarian/LibrarianRequest.yaml new file mode 100644 index 00000000..18aa94b1 --- /dev/null +++ b/specs/api/components/schemas/librarian/LibrarianRequest.yaml @@ -0,0 +1,79 @@ +type: object +description: | + Librarian service request for document library management. + + Operations: add-document, remove-document, list-documents, + start-processing, stop-processing, list-processing +required: + - operation +properties: + operation: + type: string + enum: + - add-document + - remove-document + - list-documents + - start-processing + - stop-processing + - list-processing + description: | + Library operation: + - `add-document`: Add document to library + - `remove-document`: Remove document from library + - `list-documents`: List documents in library + - `start-processing`: Start processing library documents + - `stop-processing`: Stop library processing + - `list-processing`: List processing status + flow: + type: string + description: Flow ID + example: my-flow + collection: + type: string + description: Collection identifier + default: default + example: default + user: + type: string + description: User identifier + default: trustgraph + example: alice + document-id: + type: string + description: Document identifier + example: doc-123 + processing-id: + type: string + description: Processing task identifier + example: proc-456 + document-metadata: + $ref: '../common/DocumentMetadata.yaml' + processing-metadata: + $ref: '../common/ProcessingMetadata.yaml' + content: + type: string + description: Document content (for add-document with inline content) + example: This is the document content... + criteria: + type: array + description: Search criteria for filtering documents + items: + type: object + required: + - key + - value + - operator + properties: + key: + type: string + description: Metadata field name + example: author + value: + type: string + description: Value to match + example: John Doe + operator: + type: string + enum: [eq, ne, gt, lt, contains] + description: Comparison operator + example: eq diff --git a/specs/api/components/schemas/librarian/LibrarianResponse.yaml b/specs/api/components/schemas/librarian/LibrarianResponse.yaml new file mode 100644 index 00000000..caa84628 --- /dev/null +++ b/specs/api/components/schemas/librarian/LibrarianResponse.yaml @@ -0,0 +1,18 @@ +type: object +description: Librarian service response +properties: + document-metadata: + $ref: '../common/DocumentMetadata.yaml' + content: + type: string + description: Document content + document-metadatas: + type: array + description: List of documents (returned by list-documents) + items: + $ref: '../common/DocumentMetadata.yaml' + processing-metadatas: + type: array + description: List of processing tasks (returned by list-processing) + items: + $ref: '../common/ProcessingMetadata.yaml' diff --git a/specs/api/components/schemas/loading/DocumentLoadRequest.yaml b/specs/api/components/schemas/loading/DocumentLoadRequest.yaml new file mode 100644 index 00000000..45bbe428 --- /dev/null +++ b/specs/api/components/schemas/loading/DocumentLoadRequest.yaml @@ -0,0 +1,32 @@ +type: object +description: | + Document load request - load binary document (PDF, etc.) into processing pipeline. + + Fire-and-forget operation (no response). +required: + - data +properties: + data: + type: string + description: Document data (base64 encoded) + format: byte + example: JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL... + id: + type: string + description: Document identifier + example: doc-456 + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection for document + default: default + example: research + metadata: + type: array + description: Document metadata as RDF triples + items: + $ref: '../../common/Triple.yaml' diff --git a/specs/api/components/schemas/loading/TextLoadRequest.yaml b/specs/api/components/schemas/loading/TextLoadRequest.yaml new file mode 100644 index 00000000..4ded87d5 --- /dev/null +++ b/specs/api/components/schemas/loading/TextLoadRequest.yaml @@ -0,0 +1,37 @@ +type: object +description: | + Text load request - load text document into processing pipeline. + + Fire-and-forget operation (no response). +required: + - text +properties: + text: + type: string + description: Text content (base64 encoded) + format: byte + example: VGhpcyBpcyB0aGUgZG9jdW1lbnQgdGV4dC4uLg== + id: + type: string + description: Document identifier + example: doc-123 + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection for document + default: default + example: research + charset: + type: string + description: Text character encoding + default: utf-8 + example: utf-8 + metadata: + type: array + description: Document metadata as RDF triples + items: + $ref: '../../common/Triple.yaml' diff --git a/specs/api/components/schemas/mcp-tool/McpToolRequest.yaml b/specs/api/components/schemas/mcp-tool/McpToolRequest.yaml new file mode 100644 index 00000000..b9c6ee4a --- /dev/null +++ b/specs/api/components/schemas/mcp-tool/McpToolRequest.yaml @@ -0,0 +1,17 @@ +type: object +description: | + MCP tool request - execute Model Context Protocol tool. +required: + - name +properties: + name: + type: string + description: Tool name to execute + example: search + parameters: + type: object + description: Tool parameters (JSON object, auto-converted to string internally) + additionalProperties: {} + example: + query: quantum computing + limit: 10 diff --git a/specs/api/components/schemas/mcp-tool/McpToolResponse.yaml b/specs/api/components/schemas/mcp-tool/McpToolResponse.yaml new file mode 100644 index 00000000..2c1b4974 --- /dev/null +++ b/specs/api/components/schemas/mcp-tool/McpToolResponse.yaml @@ -0,0 +1,15 @@ +type: object +description: MCP tool response +properties: + text: + type: string + description: Text response from tool + example: Found 10 results for quantum computing... + object: + type: object + description: Structured response from tool (JSON object) + additionalProperties: {} + example: + results: + - title: Introduction to Quantum Computing + url: https://example.com/qc-intro diff --git a/specs/api/components/schemas/prompt/PromptRequest.yaml b/specs/api/components/schemas/prompt/PromptRequest.yaml new file mode 100644 index 00000000..7b181016 --- /dev/null +++ b/specs/api/components/schemas/prompt/PromptRequest.yaml @@ -0,0 +1,32 @@ +type: object +description: | + Prompt service request - template-based text generation. + + Execute a stored prompt template with variable substitution. +required: + - id +properties: + id: + type: string + description: Prompt template ID (stored in config) + example: summarize-document + terms: + type: object + description: Template variables as key-value pairs (values are JSON strings) + additionalProperties: + type: string + example: + document: '"This is the document text to summarize..."' + max_length: '"200"' + variables: + type: object + description: Alternative to terms - variables as native JSON values (auto-converted) + additionalProperties: {} + example: + document: This is the document text to summarize... + max_length: 200 + streaming: + type: boolean + description: Enable streaming response delivery + default: false + example: true diff --git a/specs/api/components/schemas/prompt/PromptResponse.yaml b/specs/api/components/schemas/prompt/PromptResponse.yaml new file mode 100644 index 00000000..fbe5559b --- /dev/null +++ b/specs/api/components/schemas/prompt/PromptResponse.yaml @@ -0,0 +1,16 @@ +type: object +description: Prompt service response +properties: + text: + type: string + description: Generated text response + example: This document discusses quantum computing and its applications... + object: + type: string + description: Structured response (JSON string) if prompt produces objects + example: '{"summary": "Quantum computing overview", "key_points": [...]}' + end-of-stream: + type: boolean + description: Indicates streaming is complete (streaming mode) + default: false + example: true diff --git a/specs/api/components/schemas/query/NlpQueryRequest.yaml b/specs/api/components/schemas/query/NlpQueryRequest.yaml new file mode 100644 index 00000000..2ef72e61 --- /dev/null +++ b/specs/api/components/schemas/query/NlpQueryRequest.yaml @@ -0,0 +1,17 @@ +type: object +description: | + NLP query request - convert natural language question to structured query. +required: + - question +properties: + question: + type: string + description: Natural language question + example: Who does Alice know that works in engineering? + max-results: + type: integer + description: Maximum results to return when query is executed + default: 100 + minimum: 1 + maximum: 10000 + example: 50 diff --git a/specs/api/components/schemas/query/NlpQueryResponse.yaml b/specs/api/components/schemas/query/NlpQueryResponse.yaml new file mode 100644 index 00000000..91795c9b --- /dev/null +++ b/specs/api/components/schemas/query/NlpQueryResponse.yaml @@ -0,0 +1,47 @@ +type: object +description: NLP query response +required: + - graphql-query + - variables +properties: + graphql-query: + type: string + description: Generated GraphQL query + example: | + query GetConnections($person: ID!) { + person(id: $person) { + knows { + name + worksFor { department } + } + } + } + variables: + type: object + description: Query variables + additionalProperties: + type: string + example: + person: "https://example.com/person/alice" + detected-schemas: + type: array + description: Detected schema types used in query + items: + type: string + example: ["Person", "Organization"] + confidence: + type: number + description: Confidence score for query generation (0.0-1.0) + minimum: 0.0 + maximum: 1.0 + example: 0.87 + error: + type: object + description: Error if query generation failed + properties: + type: + type: string + example: PARSE_ERROR + message: + type: string + example: Could not understand question structure diff --git a/specs/api/components/schemas/query/ObjectsQueryRequest.yaml b/specs/api/components/schemas/query/ObjectsQueryRequest.yaml new file mode 100644 index 00000000..775bbc4b --- /dev/null +++ b/specs/api/components/schemas/query/ObjectsQueryRequest.yaml @@ -0,0 +1,40 @@ +type: object +description: | + Objects query request - GraphQL query over knowledge graph. +required: + - query +properties: + query: + type: string + description: GraphQL query string + example: | + query GetPerson($id: ID!) { + person(id: $id) { + name + email + knows { + name + } + } + } + variables: + type: object + description: GraphQL query variables + additionalProperties: + type: string + example: + id: "https://example.com/person/alice" + operation-name: + type: string + description: Operation name (for multi-operation documents) + example: GetPerson + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection to query + default: default + example: research diff --git a/specs/api/components/schemas/query/ObjectsQueryResponse.yaml b/specs/api/components/schemas/query/ObjectsQueryResponse.yaml new file mode 100644 index 00000000..8fd9b6a6 --- /dev/null +++ b/specs/api/components/schemas/query/ObjectsQueryResponse.yaml @@ -0,0 +1,54 @@ +type: object +description: Objects query response (GraphQL format) +properties: + data: + description: GraphQL response data (JSON object or null) + oneOf: + - type: object + additionalProperties: {} + - type: "null" + example: + person: + name: Alice + email: alice@example.com + knows: + - name: Bob + - name: Carol + errors: + type: array + description: GraphQL field-level errors + items: + type: object + properties: + message: + type: string + description: Error message + example: Cannot query field 'age' on type 'Person' + path: + type: array + description: Path to error location + items: + type: string + example: ["person", "age"] + extensions: + type: object + description: Additional error metadata + additionalProperties: + type: string + extensions: + type: object + description: Query metadata (execution time, etc.) + additionalProperties: + type: string + example: + execution_time_ms: "42" + error: + type: object + description: System-level error (connection, timeout, etc.) + properties: + type: + type: string + example: TIMEOUT_ERROR + message: + type: string + example: Query execution timeout diff --git a/specs/api/components/schemas/query/StructuredQueryRequest.yaml b/specs/api/components/schemas/query/StructuredQueryRequest.yaml new file mode 100644 index 00000000..ae564c0a --- /dev/null +++ b/specs/api/components/schemas/query/StructuredQueryRequest.yaml @@ -0,0 +1,22 @@ +type: object +description: | + Structured query request - natural language question with automatic execution. + + Combines NLP query generation and execution in one call. +required: + - question +properties: + question: + type: string + description: Natural language question + example: Who does Alice know that works in engineering? + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection to query + default: default + example: research diff --git a/specs/api/components/schemas/query/StructuredQueryResponse.yaml b/specs/api/components/schemas/query/StructuredQueryResponse.yaml new file mode 100644 index 00000000..4ce73685 --- /dev/null +++ b/specs/api/components/schemas/query/StructuredQueryResponse.yaml @@ -0,0 +1,34 @@ +type: object +description: Structured query response +properties: + data: + description: Query results (JSON object or null) + oneOf: + - type: object + additionalProperties: {} + - type: "null" + example: + person: + name: Alice + knows: + - name: Bob + worksFor: {name: Acme Corp, department: Engineering} + - name: Carol + worksFor: {name: Tech Inc, department: Engineering} + errors: + type: array + description: Query errors (array of error strings) + items: + type: string + example: + - Could not resolve field 'age' on type 'Person' + error: + type: object + description: System-level error + properties: + type: + type: string + example: QUERY_GENERATION_ERROR + message: + type: string + example: Failed to generate query from question diff --git a/specs/api/components/schemas/query/TriplesQueryRequest.yaml b/specs/api/components/schemas/query/TriplesQueryRequest.yaml new file mode 100644 index 00000000..88b0a1eb --- /dev/null +++ b/specs/api/components/schemas/query/TriplesQueryRequest.yaml @@ -0,0 +1,30 @@ +type: object +description: | + Triples query request - query knowledge graph by subject/predicate/object pattern. +properties: + s: + $ref: '../../common/RdfValue.yaml' + description: Subject filter (optional) + p: + $ref: '../../common/RdfValue.yaml' + description: Predicate filter (optional) + o: + $ref: '../../common/RdfValue.yaml' + description: Object filter (optional) + limit: + type: integer + description: Maximum number of triples to return + default: 10000 + minimum: 1 + maximum: 100000 + example: 100 + user: + type: string + description: User identifier + default: trustgraph + example: alice + collection: + type: string + description: Collection to query + default: default + example: research diff --git a/specs/api/components/schemas/query/TriplesQueryResponse.yaml b/specs/api/components/schemas/query/TriplesQueryResponse.yaml new file mode 100644 index 00000000..3d804c41 --- /dev/null +++ b/specs/api/components/schemas/query/TriplesQueryResponse.yaml @@ -0,0 +1,10 @@ +type: object +description: Triples query response +required: + - response +properties: + response: + type: array + description: Matching triples + items: + $ref: '../../common/Triple.yaml' diff --git a/specs/api/components/schemas/rag/DocumentRagRequest.yaml b/specs/api/components/schemas/rag/DocumentRagRequest.yaml new file mode 100644 index 00000000..97a9d2ff --- /dev/null +++ b/specs/api/components/schemas/rag/DocumentRagRequest.yaml @@ -0,0 +1,33 @@ +type: object +description: | + Document RAG (Retrieval-Augmented Generation) query request. + Searches document embeddings and generates answer using retrieved context. +required: + - query +properties: + query: + type: string + description: User query or question + example: What are the key findings in the research papers? + user: + type: string + description: User identifier for multi-tenancy + default: trustgraph + example: alice + collection: + type: string + description: Collection to search within + default: default + example: research + doc-limit: + type: integer + description: Maximum number of documents to retrieve + default: 20 + minimum: 1 + maximum: 100 + example: 10 + streaming: + type: boolean + description: Enable streaming response delivery + default: false + example: true diff --git a/specs/api/components/schemas/rag/DocumentRagResponse.yaml b/specs/api/components/schemas/rag/DocumentRagResponse.yaml new file mode 100644 index 00000000..6a0166e7 --- /dev/null +++ b/specs/api/components/schemas/rag/DocumentRagResponse.yaml @@ -0,0 +1,24 @@ +type: object +description: Document RAG response +properties: + response: + type: string + description: Generated response based on retrieved documents + example: The research papers found three key findings... + end-of-stream: + type: boolean + description: Indicates streaming is complete (streaming mode) + default: false + example: true + error: + type: object + description: Error details if request failed + properties: + message: + type: string + description: Error message + example: Failed to retrieve documents + type: + type: string + description: Error type + example: RETRIEVAL_ERROR diff --git a/specs/api/components/schemas/rag/GraphRagRequest.yaml b/specs/api/components/schemas/rag/GraphRagRequest.yaml new file mode 100644 index 00000000..733dd7c1 --- /dev/null +++ b/specs/api/components/schemas/rag/GraphRagRequest.yaml @@ -0,0 +1,54 @@ +type: object +description: | + Graph RAG (Retrieval-Augmented Generation) query request. + Searches knowledge graph and generates answer using retrieved subgraph. +required: + - query +properties: + query: + type: string + description: User query or question + example: What connections exist between quantum physics and computer science? + user: + type: string + description: User identifier for multi-tenancy + default: trustgraph + example: alice + collection: + type: string + description: Collection to search within + default: default + example: research + entity-limit: + type: integer + description: Maximum number of entities to retrieve + default: 50 + minimum: 1 + maximum: 200 + example: 30 + triple-limit: + type: integer + description: Maximum number of triples to retrieve per entity + default: 30 + minimum: 1 + maximum: 100 + example: 20 + max-subgraph-size: + type: integer + description: Maximum total subgraph size (triples) + default: 1000 + minimum: 10 + maximum: 5000 + example: 500 + max-path-length: + type: integer + description: Maximum path length for graph traversal + default: 2 + minimum: 1 + maximum: 5 + example: 3 + streaming: + type: boolean + description: Enable streaming response delivery + default: false + example: true diff --git a/specs/api/components/schemas/rag/GraphRagResponse.yaml b/specs/api/components/schemas/rag/GraphRagResponse.yaml new file mode 100644 index 00000000..75f4f059 --- /dev/null +++ b/specs/api/components/schemas/rag/GraphRagResponse.yaml @@ -0,0 +1,24 @@ +type: object +description: Graph RAG response +properties: + response: + type: string + description: Generated response based on retrieved knowledge graph + example: Quantum physics and computer science intersect in quantum computing... + end-of-stream: + type: boolean + description: Indicates streaming is complete (streaming mode) + default: false + example: true + error: + type: object + description: Error details if request failed + properties: + message: + type: string + description: Error message + example: Failed to retrieve graph data + type: + type: string + description: Error type + example: GRAPH_ERROR diff --git a/specs/api/components/schemas/text-completion/TextCompletionRequest.yaml b/specs/api/components/schemas/text-completion/TextCompletionRequest.yaml new file mode 100644 index 00000000..95c5a30d --- /dev/null +++ b/specs/api/components/schemas/text-completion/TextCompletionRequest.yaml @@ -0,0 +1,20 @@ +type: object +description: | + Text completion request - direct LLM completion without RAG. +required: + - system + - prompt +properties: + system: + type: string + description: System prompt that sets behavior and context for the LLM + example: You are a helpful assistant that provides concise answers. + prompt: + type: string + description: User prompt or question + example: Explain the concept of recursion in programming. + streaming: + type: boolean + description: Enable streaming response delivery + default: false + example: true diff --git a/specs/api/components/schemas/text-completion/TextCompletionResponse.yaml b/specs/api/components/schemas/text-completion/TextCompletionResponse.yaml new file mode 100644 index 00000000..b97573c7 --- /dev/null +++ b/specs/api/components/schemas/text-completion/TextCompletionResponse.yaml @@ -0,0 +1,26 @@ +type: object +description: Text completion response +required: + - response +properties: + response: + type: string + description: Generated text response + example: Recursion is a programming technique where a function calls itself... + in-token: + type: integer + description: Number of input tokens consumed + example: 45 + out-token: + type: integer + description: Number of output tokens generated + example: 128 + model: + type: string + description: Model used for completion + example: gpt-4 + end-of-stream: + type: boolean + description: Indicates streaming is complete (streaming mode) + default: false + example: true diff --git a/specs/api/openapi.yaml b/specs/api/openapi.yaml new file mode 100644 index 00000000..b3258d14 --- /dev/null +++ b/specs/api/openapi.yaml @@ -0,0 +1,160 @@ +openapi: 3.1.0 + +info: + title: TrustGraph API Gateway + version: 1.8.0 + description: | + REST API for TrustGraph - an AI-powered knowledge graph and RAG system. + + ## Overview + + The API provides access to: + - **Global Services**: Configuration, flow management, knowledge storage, library management + - **Flow-Hosted Services**: AI services like RAG, text completion, embeddings (require running flow) + - **Import/Export**: Bulk data operations for triples, embeddings, entity contexts + - **WebSocket**: Multiplexed interface for all services + + ## Service Types + + ### Global Services + Fixed endpoints accessible via `/api/v1/{kind}`: + - `config` - Configuration management + - `flow` - Flow lifecycle and blueprints + - `librarian` - Document library management + - `knowledge` - Knowledge graph core management + - `collection-management` - Collection metadata + + ### Flow-Hosted Services + Require running flow instance, accessed via `/api/v1/flow/{flow}/service/{kind}`: + - AI services: agent, text-completion, prompt, RAG (document/graph) + - Embeddings: embeddings, graph-embeddings, document-embeddings + - Query: triples, objects, nlp-query, structured-query + - Data loading: text-load, document-load + - Utilities: mcp-tool, structured-diag + + ## Authentication + + Bearer token authentication when `GATEWAY_SECRET` environment variable is set. + Include token in Authorization header: + ``` + Authorization: Bearer + ``` + + If `GATEWAY_SECRET` is not set, API runs without authentication (development mode). + + ## Field Naming + + All JSON fields use **kebab-case**: `flow-id`, `blueprint-name`, `doc-limit`, etc. + + ## Error Responses + + All endpoints may return errors in this format: + ```json + { + "error": { + "type": "gateway-error", + "message": "Timeout" + } + } + ``` + + contact: + name: TrustGraph Project + url: https://trustgraph.ai + license: + name: Apache 2.0 + url: https://www.apache.org/licenses/LICENSE-2.0.html + +servers: + - url: http://localhost:8088 + description: Local development server + +security: + - bearerAuth: [] + +tags: + - name: Config + description: Configuration management (global service) + - name: Flow + description: Flow lifecycle and blueprint management (global service) + - name: Librarian + description: Document library management (global service) + - name: Knowledge + description: Knowledge graph core management (global service) + - name: Collection + description: Collection metadata management (global service) + - name: Flow Services + description: Services hosted within flow instances + - name: Import/Export + description: Bulk data import and export + - name: WebSocket + description: WebSocket interfaces + - name: Metrics + description: System metrics and monitoring + +paths: + /api/v1/config: + $ref: './paths/config.yaml' + /api/v1/flow: + $ref: './paths/flow.yaml' + /api/v1/librarian: + $ref: './paths/librarian.yaml' + /api/v1/knowledge: + $ref: './paths/knowledge.yaml' + /api/v1/collection-management: + $ref: './paths/collection-management.yaml' + + # Flow-hosted services (require running flow instance) + /api/v1/flow/{flow}/service/agent: + $ref: './paths/flow/agent.yaml' + /api/v1/flow/{flow}/service/document-rag: + $ref: './paths/flow/document-rag.yaml' + /api/v1/flow/{flow}/service/graph-rag: + $ref: './paths/flow/graph-rag.yaml' + /api/v1/flow/{flow}/service/text-completion: + $ref: './paths/flow/text-completion.yaml' + /api/v1/flow/{flow}/service/prompt: + $ref: './paths/flow/prompt.yaml' + /api/v1/flow/{flow}/service/embeddings: + $ref: './paths/flow/embeddings.yaml' + /api/v1/flow/{flow}/service/mcp-tool: + $ref: './paths/flow/mcp-tool.yaml' + /api/v1/flow/{flow}/service/triples: + $ref: './paths/flow/triples.yaml' + /api/v1/flow/{flow}/service/objects: + $ref: './paths/flow/objects.yaml' + /api/v1/flow/{flow}/service/nlp-query: + $ref: './paths/flow/nlp-query.yaml' + /api/v1/flow/{flow}/service/structured-query: + $ref: './paths/flow/structured-query.yaml' + /api/v1/flow/{flow}/service/structured-diag: + $ref: './paths/flow/structured-diag.yaml' + /api/v1/flow/{flow}/service/graph-embeddings: + $ref: './paths/flow/graph-embeddings.yaml' + /api/v1/flow/{flow}/service/document-embeddings: + $ref: './paths/flow/document-embeddings.yaml' + /api/v1/flow/{flow}/service/text-load: + $ref: './paths/flow/text-load.yaml' + /api/v1/flow/{flow}/service/document-load: + $ref: './paths/flow/document-load.yaml' + + # Import/Export endpoints + /api/v1/import-core: + $ref: './paths/import-core.yaml' + /api/v1/export-core: + $ref: './paths/export-core.yaml' + + # WebSocket endpoints + /api/v1/socket: + $ref: './paths/websocket.yaml' + + # Metrics endpoint + /api/metrics: + $ref: './paths/metrics.yaml' + /api/metrics/{path}: + $ref: './paths/metrics-path.yaml' + +components: + securitySchemes: + bearerAuth: + $ref: './security/bearerAuth.yaml' diff --git a/specs/api/paths/collection-management.yaml b/specs/api/paths/collection-management.yaml new file mode 100644 index 00000000..7dffd4e0 --- /dev/null +++ b/specs/api/paths/collection-management.yaml @@ -0,0 +1,108 @@ +post: + tags: + - Collection + summary: Collection metadata management + description: | + Manage collection metadata for organizing documents and knowledge. + + ## Collections + + Collections are organizational units for grouping: + - Documents in the librarian + - Knowledge cores + - User data + + Each collection has: + - **user**: Owner identifier + - **collection**: Unique collection ID + - **name**: Human-readable display name + - **description**: Purpose and contents + - **tags**: Labels for filtering and organization + + ## Operations + + ### list-collections + List all collections for a user. Optionally filter by tags and limit results. + Returns array of collection metadata. + + ### update-collection + Create or update collection metadata. If collection doesn't exist, it's created. + If it exists, metadata is updated. Allows setting name, description, and tags. + + ### delete-collection + Delete a collection by user and collection ID. This removes the metadata but + typically does not delete the associated data (documents, knowledge cores). + + operationId: collectionManagementService + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '../components/schemas/collection/CollectionRequest.yaml' + examples: + listCollections: + summary: List all collections for user + value: + operation: list-collections + user: alice + listCollectionsFiltered: + summary: List collections filtered by tags + value: + operation: list-collections + user: alice + tag-filter: ["research", "AI"] + limit: 50 + updateCollection: + summary: Create/update collection + value: + operation: update-collection + user: alice + collection: research + name: Research Papers + description: Academic research papers on AI and ML + tags: ["research", "AI", "academic"] + timestamp: "2024-01-15T10:30:00Z" + deleteCollection: + summary: Delete collection + value: + operation: delete-collection + user: alice + collection: research + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../components/schemas/collection/CollectionResponse.yaml' + examples: + listCollections: + summary: List of collections + value: + timestamp: "2024-01-15T10:30:00Z" + collections: + - user: alice + collection: research + name: Research Papers + description: Academic research papers on AI and ML + tags: ["research", "AI", "academic"] + - user: alice + collection: personal + name: Personal Documents + description: Personal notes and documents + tags: ["personal"] + updateSuccess: + summary: Update successful + value: + timestamp: "2024-01-15T10:30:00Z" + deleteSuccess: + summary: Delete successful + value: + timestamp: "2024-01-15T10:30:00Z" + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/config.yaml b/specs/api/paths/config.yaml new file mode 100644 index 00000000..ef95498b --- /dev/null +++ b/specs/api/paths/config.yaml @@ -0,0 +1,165 @@ +post: + tags: + - Config + summary: Configuration service + description: | + Manage TrustGraph configuration including flows, prompts, token costs, parameter types, and more. + + ## Operations + + ### config + Get the complete system configuration including all flows, prompts, token costs, etc. + + ### list + List all configuration items of a specific type (e.g., all flows, all prompts). + + ### get + Retrieve specific configuration items by type and key. + + ### put + Create or update configuration values. + + ### delete + Delete configuration items. + + ## Configuration Types + + - `flow` - Flow instance definitions + - `flow-blueprint` - Flow blueprint definitions (stored separately from flow instances) + - `prompt` - Prompt templates + - `token-cost` - Model token pricing + - `parameter-type` - Parameter type definitions + - `interface-description` - Interface descriptions + - Custom types as needed + + ## Important Distinction + + The **config service** manages *stored configuration*. + The **flow service** (`/api/v1/flow`) manages *running flow instances*. + + - Use config service to store/retrieve flow definitions + - Use flow service to start/stop/manage running flows + + operationId: configService + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '../components/schemas/config/ConfigRequest.yaml' + examples: + getCompleteConfig: + summary: Get complete configuration + value: + operation: config + listFlows: + summary: List all stored flow definitions + value: + operation: list + type: flow + listPrompts: + summary: List all prompts + value: + operation: list + type: prompt + getFlow: + summary: Get specific flow definition + value: + operation: get + keys: + - type: flow + key: default + putFlow: + summary: Create/update flow definition + value: + operation: put + values: + - type: flow + key: my-flow + value: + blueprint-name: document-rag + description: My RAG flow + parameters: + model: gpt-4 + putPrompt: + summary: Set system prompt + value: + operation: put + values: + - type: prompt + key: system + value: You are a helpful AI assistant specialized in data analysis + putTokenCost: + summary: Set token costs for a model + value: + operation: put + values: + - type: token-cost + key: gpt-4 + value: + prompt: 0.03 + completion: 0.06 + deleteFlow: + summary: Delete flow definition + value: + operation: delete + keys: + - type: flow + key: my-flow + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../components/schemas/config/ConfigResponse.yaml' + examples: + completeConfig: + summary: Complete configuration + value: + version: 42 + config: + flow: + default: + blueprint-name: document-rag+graph-rag + description: Default flow + interfaces: + agent: + request: non-persistent://tg/request/agent:default + response: non-persistent://tg/response/agent:default + prompt: + system: You are a helpful AI assistant + token-cost: + gpt-4: + prompt: 0.03 + completion: 0.06 + listFlows: + summary: List of flow definition keys + value: + directory: + - default + - production + - my-flow + getFlow: + summary: Retrieved flow definition + value: + values: + - type: flow + key: default + value: + blueprint-name: document-rag+graph-rag + description: Default flow + putSuccess: + summary: Put operation success + value: + version: 43 + deleteSuccess: + summary: Delete operation success + value: + version: 44 + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/export-core.yaml b/specs/api/paths/export-core.yaml new file mode 100644 index 00000000..e7dc06b0 --- /dev/null +++ b/specs/api/paths/export-core.yaml @@ -0,0 +1,108 @@ +get: + tags: + - Import/Export + summary: Export Core - bulk export triples and embeddings + description: | + Export knowledge cores in bulk using streaming MessagePack format. + + ## Export Core Overview + + Bulk data export for knowledge graph: + - **Format**: MessagePack streaming + - **Content**: Triples and graph embeddings + - **Source**: Global knowledge storage + - **Use**: Backups, data migration, archival + + ## MessagePack Protocol + + Response body is MessagePack stream with message tuples: + + ### Triple Message + ``` + ("t", { + "m": { // Metadata + "i": "core-id", // Knowledge core ID + "m": [...], // Metadata triples array + "u": "user", // User + "c": "collection" // Collection + }, + "t": [...] // Triples array + }) + ``` + + ### Graph Embeddings Message + ``` + ("ge", { + "m": { // Metadata + "i": "core-id", + "m": [...], + "u": "user", + "c": "collection" + }, + "e": [ // Entities array + { + "e": {"v": "uri", "e": true}, // Entity RdfValue + "v": [0.1, 0.2, ...] // Vectors + } + ] + }) + ``` + + ### End of Stream Message + ``` + ("eos", {}) + ``` + + ## Query Parameters + + - **id**: Knowledge core ID to export + - **user**: User identifier + + ## Streaming + + Data streamed incrementally: + - Triples sent first + - Graph embeddings sent next + - EOS marker signals completion + + Client should process messages as received. + + ## Use Cases + + - **Backups**: Export for disaster recovery + - **Data migration**: Move to another system + - **Archival**: Long-term storage + - **Replication**: Copy knowledge cores + - **Analysis**: External processing + + operationId: exportCore + security: + - bearerAuth: [] + parameters: + - name: id + in: query + required: true + schema: + type: string + description: Knowledge core ID to export + example: core-123 + - name: user + in: query + required: true + schema: + type: string + description: User identifier + example: alice + responses: + '200': + description: Export stream + content: + application/msgpack: + schema: + type: string + format: binary + description: MessagePack stream of knowledge data + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/flow.yaml b/specs/api/paths/flow.yaml new file mode 100644 index 00000000..181e03bf --- /dev/null +++ b/specs/api/paths/flow.yaml @@ -0,0 +1,194 @@ +post: + tags: + - Flow + summary: Flow lifecycle and blueprint management + description: | + Manage flow instances and blueprints. + + ## Important Distinction + + The **flow service** manages *running flow instances*. + The **config service** (`/api/v1/config`) manages *stored configuration*. + + - Use flow service to start/stop/manage running flows + - Use config service to store/retrieve flow definitions + + ## Flow Instance Operations + + ### start-flow + Start a new flow instance from a blueprint. The blueprint must exist (either built-in or created via put-blueprint). + + Parameters are resolved from: + 1. User-provided values (--param) + 2. Default values from parameter type definitions + 3. Controlled-by relationships + + ### stop-flow + Stop a running flow instance. This terminates all processors and releases resources. + + ### list-flows + List all currently running flow instances. + + ### get-flow + Get details of a running flow including its configuration, parameters, and interface queue names. + + ## Blueprint Operations + + ### list-blueprints + List all available flow blueprints (built-in and custom). + + ### get-blueprint + Retrieve a blueprint definition showing its structure, parameters, processors, and interfaces. + + ### put-blueprint + Create or update a flow blueprint definition. + + Blueprints define: + - **Class processors**: Shared across all instances of this blueprint + - **Flow processors**: Unique to each flow instance + - **Interfaces**: Entry points for external systems + - **Parameters**: Configurable values for customization + + ### delete-blueprint + Delete a custom blueprint definition. Built-in blueprints cannot be deleted. + + operationId: flowService + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '../components/schemas/flow/FlowRequest.yaml' + examples: + startFlow: + summary: Start a flow instance + value: + operation: start-flow + flow-id: my-flow + blueprint-name: document-rag + description: My document processing flow + parameters: + model: gpt-4 + temperature: "0.7" + startFlowMinimal: + summary: Start flow with defaults + value: + operation: start-flow + flow-id: my-flow + blueprint-name: document-rag + stopFlow: + summary: Stop a flow instance + value: + operation: stop-flow + flow-id: my-flow + listFlows: + summary: List running flows + value: + operation: list-flows + getFlow: + summary: Get flow details + value: + operation: get-flow + flow-id: my-flow + listBlueprints: + summary: List available blueprints + value: + operation: list-blueprints + getBlueprint: + summary: Get blueprint definition + value: + operation: get-blueprint + blueprint-name: document-rag + putBlueprint: + summary: Create/update blueprint + value: + operation: put-blueprint + blueprint-name: my-custom-rag + blueprint-definition: + description: Custom RAG pipeline + parameters: + model: + type: llm-model + description: LLM model + order: 1 + class: + text-completion:{class}: + request: non-persistent://tg/request/text-completion:{class} + response: non-persistent://tg/response/text-completion:{class} + flow: + chunker:{id}: + input: persistent://tg/flow/chunk:{id} + output: persistent://tg/flow/chunk-load:{id} + interfaces: + agent: + request: non-persistent://tg/request/agent:{id} + response: non-persistent://tg/response/agent:{id} + deleteBlueprint: + summary: Delete blueprint + value: + operation: delete-blueprint + blueprint-name: my-custom-rag + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../components/schemas/flow/FlowResponse.yaml' + examples: + startFlow: + summary: Flow started + value: + flow-id: my-flow + listFlows: + summary: Running flows + value: + flow-ids: + - default + - production + - my-flow + getFlow: + summary: Flow details + value: + flow: + blueprint-name: document-rag + description: My document processing flow + parameters: + model: gpt-4 + temperature: "0.7" + interfaces: + agent: + request: non-persistent://tg/request/agent:my-flow + response: non-persistent://tg/response/agent:my-flow + text-load: persistent://tg/flow/text-document-load:my-flow + listBlueprints: + summary: Available blueprints + value: + blueprint-names: + - document-rag + - graph-rag + - document-rag+graph-rag + - my-custom-rag + getBlueprint: + summary: Blueprint definition + value: + blueprint-definition: + description: Standard RAG pipeline + parameters: + model: + type: llm-model + order: 1 + class: + text-completion:{class}: + request: non-persistent://tg/request/text-completion:{class} + response: non-persistent://tg/response/text-completion:{class} + interfaces: + agent: + request: non-persistent://tg/request/agent:{id} + response: non-persistent://tg/response/agent:{id} + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/agent.yaml b/specs/api/paths/flow/agent.yaml new file mode 100644 index 00000000..91f92ebd --- /dev/null +++ b/specs/api/paths/flow/agent.yaml @@ -0,0 +1,130 @@ +post: + tags: + - Flow Services + summary: Agent service - conversational AI with reasoning + description: | + AI agent that can understand questions, reason about them, and take actions. + + ## Agent Overview + + The agent service provides a conversational AI that: + - Understands natural language questions + - Reasons about problems using thoughts + - Takes actions to gather information + - Provides coherent answers + + ## Request Format + + Send a question with optional: + - **state**: Continue from previous conversation + - **history**: Previous agent steps for context + - **group**: Collaborative agent identifiers + - **streaming**: Enable streaming responses + + ## Response Modes + + ### Streaming Mode (streaming: true) + Responses arrive as chunks with `chunk-type`: + - `thought`: Agent's reasoning process + - `action`: Action being taken + - `observation`: Result from action + - `answer`: Final response to user + - `error`: Error occurred + + Each chunk may have multiple messages. Check flags: + - `end-of-message`: Current chunk type complete + - `end-of-dialog`: Entire conversation complete + + ### Legacy Mode (streaming: false) + Single response with: + - `answer`: Complete answer + - `thought`: Reasoning (if any) + - `observation`: Observations (if any) + + ## Multi-turn Conversations + + Include `history` array with previous steps to maintain context. + Each step has: thought, action, arguments, observation. + + operationId: agentService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/agent/AgentRequest.yaml' + examples: + simpleQuestion: + summary: Simple question + value: + question: What is the capital of France? + user: alice + streamingQuestion: + summary: Question with streaming enabled + value: + question: Explain quantum computing + user: alice + streaming: true + conversationWithHistory: + summary: Multi-turn conversation + value: + question: And what about its population? + user: alice + history: + - thought: User is asking about the capital of France + action: search + arguments: + query: "capital of France" + observation: "Paris is the capital of France" + user: alice + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/agent/AgentResponse.yaml' + examples: + streamingThought: + summary: Streaming thought chunk + value: + chunk-type: thought + content: I need to search for information about quantum computing + end-of-message: false + end-of-dialog: false + streamingAnswer: + summary: Streaming answer chunk + value: + chunk-type: answer + content: Quantum computing uses quantum mechanics principles... + end-of-message: false + end-of-dialog: false + streamingComplete: + summary: Streaming complete marker + value: + chunk-type: answer + content: "" + end-of-message: true + end-of-dialog: true + legacyResponse: + summary: Legacy non-streaming response + value: + answer: Paris is the capital of France. + thought: User is asking about the capital of France + observation: "" + end-of-message: false + end-of-dialog: false + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/document-embeddings.yaml b/specs/api/paths/flow/document-embeddings.yaml new file mode 100644 index 00000000..dbab2f92 --- /dev/null +++ b/specs/api/paths/flow/document-embeddings.yaml @@ -0,0 +1,103 @@ +post: + tags: + - Flow Services + summary: Document Embeddings Query - find similar text chunks + description: | + Query document embeddings to find similar text chunks by vector similarity. + + ## Document Embeddings Query Overview + + Find document chunks semantically similar to a query vector: + - **Input**: Query embedding vector + - **Search**: Compare against stored chunk embeddings + - **Output**: Most similar text chunks + + Core component of document RAG retrieval. + + ## Use Cases + + - **Document retrieval**: Find relevant passages + - **Semantic search**: Search by meaning not keywords + - **Context gathering**: Get text for RAG + - **Similar content**: Discover related documents + + ## Process + + 1. Obtain query embedding (via embeddings service) + 2. Query stored document chunk embeddings + 3. Calculate cosine similarity + 4. Return top N most similar chunks + 5. Use chunks as context for generation + + ## Chunking + + Documents are split into chunks during indexing: + - Typical size: 200-1000 tokens + - Overlap between chunks for continuity + - Each chunk has own embedding + + Queries return individual chunks, not full documents. + + ## Similarity Scoring + + Uses cosine similarity: + - Results ordered by similarity + - No explicit scores in response + - Limit controls result count + + ## Output Format + + Returns text chunks as strings: + - Raw chunk text + - No metadata (source, position, etc.) + - Use for LLM context directly + + operationId: documentEmbeddingsQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml' + examples: + basicQuery: + summary: Find similar chunks + value: + vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178] + limit: 10 + user: alice + collection: research + largeQuery: + summary: Larger result set + value: + vectors: [0.1, -0.2, 0.3, -0.4, 0.5] + limit: 30 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings-query/DocumentEmbeddingsQueryResponse.yaml' + examples: + similarChunks: + summary: Similar document chunks + value: + chunks: + - "Quantum computing uses quantum mechanics principles like superposition and entanglement for computation. Unlike classical bits, quantum bits (qubits) can exist in multiple states simultaneously." + - "Neural networks are computing systems inspired by biological neural networks. They consist of interconnected nodes organized in layers that process information through weighted connections." + - "Machine learning algorithms learn patterns from data without being explicitly programmed. They improve their performance through experience and exposure to training data." + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/document-load.yaml b/specs/api/paths/flow/document-load.yaml new file mode 100644 index 00000000..09ddc09f --- /dev/null +++ b/specs/api/paths/flow/document-load.yaml @@ -0,0 +1,119 @@ +post: + tags: + - Flow Services + summary: Document Load - load binary documents (PDF, etc.) + description: | + Load binary documents (PDF, Word, etc.) into processing pipeline. + + ## Document Load Overview + + Fire-and-forget binary document loading: + - **Input**: Document data (base64 encoded) + - **Process**: Extract text, chunk, embed, store + - **Output**: None (202 Accepted) + + Asynchronous processing for PDF and other binary formats. + + ## Processing Pipeline + + Documents go through: + 1. **Text extraction**: PDF→text, DOCX→text, etc. + 2. **Chunking**: Split into overlapping chunks + 3. **Embedding**: Generate vectors for each chunk + 4. **Storage**: Store chunks + embeddings + 5. **Indexing**: Make searchable + + Pipeline runs asynchronously. + + ## Supported Formats + + - **PDF**: Portable Document Format + - **DOCX**: Microsoft Word + - **HTML**: Web pages + - Other formats via extractors + + Format detected from content, not extension. + + ## Binary Encoding + + Documents must be base64 encoded: + ```python + with open('document.pdf', 'rb') as f: + doc_bytes = f.read() + encoded = base64.b64encode(doc_bytes).decode('utf-8') + ``` + + ## Metadata + + Optional RDF triples: + - Document properties + - Source information + - Custom attributes + + ## Use Cases + + - **PDF ingestion**: Process research papers + - **Document libraries**: Index document collections + - **Content migration**: Import from other systems + - **Automated processing**: Batch document loading + + ## No Response Data + + Returns 202 Accepted immediately: + - Document queued + - Processing happens asynchronously + - No status tracking + - Query later to verify indexed + + operationId: documentLoadService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/loading/DocumentLoadRequest.yaml' + examples: + loadPdf: + summary: Load PDF document + value: + data: JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCg== + id: doc-789 + user: alice + collection: research + withMetadata: + summary: Load with metadata + value: + data: JVBERi0xLjQKJeLjz9MK... + id: doc-101112 + user: bob + collection: papers + metadata: + - s: {v: "doc-101112", e: false} + p: {v: "http://purl.org/dc/terms/title", e: true} + o: {v: "Quantum Entanglement Research", e: false} + - s: {v: "doc-101112", e: false} + p: {v: "http://purl.org/dc/terms/date", e: true} + o: {v: "2024-01-15", e: false} + responses: + '202': + description: Document accepted for processing + content: + application/json: + schema: + type: object + properties: {} + example: {} + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/document-rag.yaml b/specs/api/paths/flow/document-rag.yaml new file mode 100644 index 00000000..fd738f33 --- /dev/null +++ b/specs/api/paths/flow/document-rag.yaml @@ -0,0 +1,107 @@ +post: + tags: + - Flow Services + summary: Document RAG - retrieve and generate from documents + description: | + Retrieval-Augmented Generation over document embeddings. + + ## Document RAG Overview + + Document RAG combines: + 1. **Retrieval**: Search document embeddings using semantic similarity + 2. **Generation**: Use LLM to synthesize answer from retrieved documents + + This provides grounded answers based on your document corpus. + + ## Query Process + + 1. Convert query to embedding + 2. Search document embeddings for most similar chunks + 3. Retrieve top N document chunks (configurable via doc-limit) + 4. Pass query + retrieved context to LLM + 5. Generate answer grounded in documents + + ## Streaming + + Enable `streaming: true` to receive the answer as it's generated: + - Multiple messages with `response` content + - Final message with `end-of-stream: true` + + Without streaming, returns complete answer in single response. + + ## Parameters + + - **doc-limit**: Controls retrieval depth (1-100, default 20) + - Higher = more context but slower + - Lower = faster but may miss relevant info + - **collection**: Target specific document collection + - **user**: Multi-tenant isolation + + operationId: documentRagService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/rag/DocumentRagRequest.yaml' + examples: + basicQuery: + summary: Basic document query + value: + query: What are the key findings in the research papers? + user: alice + collection: research + streamingQuery: + summary: Streaming query + value: + query: Summarize the main conclusions + user: alice + collection: research + doc-limit: 15 + streaming: true + limitedRetrieval: + summary: Query with limited retrieval + value: + query: What is quantum entanglement? + doc-limit: 5 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/rag/DocumentRagResponse.yaml' + examples: + completeResponse: + summary: Complete non-streaming response + value: + response: | + The research papers present three key findings: + 1. Quantum entanglement exhibits non-local correlations + 2. Bell's inequality is violated in experimental tests + 3. Applications in quantum cryptography are promising + end-of-stream: false + streamingChunk: + summary: Streaming response chunk + value: + response: "The research papers present three" + end-of-stream: false + streamingComplete: + summary: Streaming complete marker + value: + response: "" + end-of-stream: true + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/embeddings.yaml b/specs/api/paths/flow/embeddings.yaml new file mode 100644 index 00000000..e7c7a3f5 --- /dev/null +++ b/specs/api/paths/flow/embeddings.yaml @@ -0,0 +1,85 @@ +post: + tags: + - Flow Services + summary: Embeddings - text to vector conversion + description: | + Convert text to embedding vectors for semantic similarity search. + + ## Embeddings Overview + + Embeddings transform text into dense vector representations that: + - Capture semantic meaning + - Enable similarity comparisons via cosine distance + - Support semantic search and retrieval + - Power RAG systems + + ## Use Cases + + - **Document indexing**: Convert documents to vectors for storage + - **Query encoding**: Convert search queries for similarity matching + - **Semantic similarity**: Find related texts via vector distance + - **Clustering**: Group similar content + - **Classification**: Use as features for ML models + + ## Vector Dimensions + + Dimension count depends on embedding model: + - text-embedding-ada-002: 1536 dimensions + - text-embedding-3-small: 1536 dimensions + - text-embedding-3-large: 3072 dimensions + - Custom models: Varies + + ## Single Request + + Unlike batch embedding APIs, this endpoint processes one text at a time. + For bulk operations, use document-load or text-load services. + + operationId: embeddingsService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings/EmbeddingsRequest.yaml' + examples: + shortText: + summary: Short text embedding + value: + text: Machine learning + sentence: + summary: Sentence embedding + value: + text: Quantum computing uses quantum mechanics principles for computation. + paragraph: + summary: Paragraph embedding + value: + text: | + Neural networks are computing systems inspired by biological neural networks. + They consist of interconnected nodes (neurons) organized in layers. + Through training, they learn to recognize patterns and make predictions. + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings/EmbeddingsResponse.yaml' + examples: + embeddingVector: + summary: Embedding vector + value: + vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178, 0.045, 0.312] + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/graph-embeddings.yaml b/specs/api/paths/flow/graph-embeddings.yaml new file mode 100644 index 00000000..277659de --- /dev/null +++ b/specs/api/paths/flow/graph-embeddings.yaml @@ -0,0 +1,95 @@ +post: + tags: + - Flow Services + summary: Graph Embeddings Query - find similar entities + description: | + Query graph embeddings to find similar entities by vector similarity. + + ## Graph Embeddings Query Overview + + Find entities semantically similar to a query vector: + - **Input**: Query embedding vector + - **Search**: Compare against stored entity embeddings + - **Output**: Most similar entities (RDF URIs) + + Core component of graph RAG retrieval. + + ## Use Cases + + - **Entity discovery**: Find related entities + - **Concept expansion**: Discover similar concepts + - **Graph exploration**: Navigate by semantic similarity + - **RAG retrieval**: Get entities for context + + ## Process + + 1. Obtain query embedding (via embeddings service) + 2. Query stored entity embeddings + 3. Calculate cosine similarity + 4. Return top N most similar entities + 5. Use entities to retrieve triples/subgraph + + ## Similarity Scoring + + Uses cosine similarity between vectors: + - Results ordered by similarity (most similar first) + - No explicit similarity scores returned + - Limit controls result count + + ## Entity Format + + Returns RDF values (entities): + - URI entities: `{v: "https://...", e: true}` + - These are references to knowledge graph entities + - Use with triples query to get entity details + + operationId: graphEmbeddingsQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml' + examples: + basicQuery: + summary: Find similar entities + value: + vectors: [0.023, -0.142, 0.089, 0.234, -0.067, 0.156, 0.201, -0.178] + limit: 10 + user: alice + collection: research + largeQuery: + summary: Larger result set + value: + vectors: [0.1, -0.2, 0.3, -0.4, 0.5] + limit: 50 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/embeddings-query/GraphEmbeddingsQueryResponse.yaml' + examples: + similarEntities: + summary: Similar entities found + value: + entities: + - {v: "https://example.com/person/alice", e: true} + - {v: "https://example.com/person/bob", e: true} + - {v: "https://example.com/concept/quantum-computing", e: true} + - {v: "https://example.com/concept/machine-learning", e: true} + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/graph-rag.yaml b/specs/api/paths/flow/graph-rag.yaml new file mode 100644 index 00000000..9bcb6940 --- /dev/null +++ b/specs/api/paths/flow/graph-rag.yaml @@ -0,0 +1,127 @@ +post: + tags: + - Flow Services + summary: Graph RAG - retrieve and generate from knowledge graph + description: | + Retrieval-Augmented Generation over knowledge graph. + + ## Graph RAG Overview + + Graph RAG combines: + 1. **Retrieval**: Find relevant entities and subgraph from knowledge graph + 2. **Generation**: Use LLM to reason over graph structure and generate answer + + This provides graph-aware answers that leverage relationships and structure. + + ## Query Process + + 1. Identify relevant entities from query (using embeddings) + 2. Retrieve connected subgraph around entities + 3. Optionally traverse paths up to max-path-length hops + 4. Limit subgraph size to stay within context window + 5. Pass query + graph structure to LLM + 6. Generate answer incorporating graph relationships + + ## Streaming + + Enable `streaming: true` to receive the answer as it's generated: + - Multiple messages with `response` content + - Final message with `end-of-stream: true` + + Without streaming, returns complete answer in single response. + + ## Parameters + + Control retrieval scope with multiple knobs: + - **entity-limit**: How many starting entities to find (1-200, default 50) + - **triple-limit**: Triples per entity (1-100, default 30) + - **max-subgraph-size**: Total subgraph cap (10-5000, default 1000) + - **max-path-length**: Graph traversal depth (1-5, default 2) + + Higher limits = more context but: + - Slower retrieval + - Larger context for LLM + - May hit context window limits + + ## Use Cases + + Best for queries requiring: + - Relationship understanding ("How are X and Y connected?") + - Multi-hop reasoning ("What's the path from A to B?") + - Structural analysis ("What are the main entities related to X?") + + operationId: graphRagService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/rag/GraphRagRequest.yaml' + examples: + basicQuery: + summary: Basic graph query + value: + query: What connections exist between quantum physics and computer science? + user: alice + collection: research + streamingQuery: + summary: Streaming query with custom limits + value: + query: Trace the historical development of AI from Turing to modern LLMs + user: alice + collection: research + entity-limit: 40 + triple-limit: 25 + max-subgraph-size: 800 + max-path-length: 3 + streaming: true + focusedQuery: + summary: Focused query with tight limits + value: + query: What is the immediate relationship between entity A and B? + entity-limit: 10 + triple-limit: 15 + max-subgraph-size: 200 + max-path-length: 1 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/rag/GraphRagResponse.yaml' + examples: + completeResponse: + summary: Complete non-streaming response + value: + response: | + Quantum physics and computer science intersect primarily through quantum computing. + The knowledge graph shows connections through: + - Quantum algorithms (Shor's algorithm, Grover's algorithm) + - Quantum information theory + - Computational complexity theory + end-of-stream: false + streamingChunk: + summary: Streaming response chunk + value: + response: "Quantum physics and computer science intersect" + end-of-stream: false + streamingComplete: + summary: Streaming complete marker + value: + response: "" + end-of-stream: true + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/mcp-tool.yaml b/specs/api/paths/flow/mcp-tool.yaml new file mode 100644 index 00000000..9f53df36 --- /dev/null +++ b/specs/api/paths/flow/mcp-tool.yaml @@ -0,0 +1,119 @@ +post: + tags: + - Flow Services + summary: MCP Tool - execute Model Context Protocol tools + description: | + Execute MCP (Model Context Protocol) tools for agent capabilities. + + ## MCP Tool Overview + + MCP tools provide agent capabilities through standardized protocol: + - **Search tools**: Web search, document search + - **Data tools**: Database queries, API calls + - **Action tools**: File operations, system commands + - **Integration tools**: Third-party service connectors + + Tools extend agent capabilities beyond pure LLM generation. + + ## Tool Execution + + Tools are: + 1. Registered via MCP protocol + 2. Discovered by agent + 3. Called with structured parameters + 4. Return text or structured results + + ## Request Format + + - **name**: Tool identifier (e.g., "search", "calculator", "weather") + - **parameters**: Tool-specific arguments as JSON object + + ## Response Format + + Tools can return: + - **text**: Plain text result (simple tools) + - **object**: Structured JSON result (complex tools) + + ## Tool Registration + + Tools are registered via MCP server configuration: + - Define tool schema (name, parameters, description) + - Implement tool handler + - Register with MCP server + - Agent discovers and uses tool + + ## Use Cases + + - **Web search**: Find external information + - **Calculator**: Perform calculations + - **Database query**: Retrieve structured data + - **API integration**: Call external services + - **File operations**: Read/write files + - **Code execution**: Run scripts + + operationId: mcpToolService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/mcp-tool/McpToolRequest.yaml' + examples: + searchTool: + summary: Search tool execution + value: + name: search + parameters: + query: quantum computing + limit: 10 + calculatorTool: + summary: Calculator tool + value: + name: calculator + parameters: + expression: (42 * 7) + 15 + weatherTool: + summary: Weather tool + value: + name: weather + parameters: + location: San Francisco + units: celsius + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/mcp-tool/McpToolResponse.yaml' + examples: + textResponse: + summary: Text result + value: + text: The result is 309 + objectResponse: + summary: Structured result + value: + object: + results: + - title: Introduction to Quantum Computing + url: https://example.com/qc-intro + snippet: Quantum computing uses quantum mechanics... + - title: Quantum Algorithms + url: https://example.com/qc-algos + snippet: Key algorithms include Shor's and Grover's... + total: 10 + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/nlp-query.yaml b/specs/api/paths/flow/nlp-query.yaml new file mode 100644 index 00000000..7032b5b9 --- /dev/null +++ b/specs/api/paths/flow/nlp-query.yaml @@ -0,0 +1,148 @@ +post: + tags: + - Flow Services + summary: NLP Query - natural language to structured query + description: | + Convert natural language questions to structured GraphQL queries. + + ## NLP Query Overview + + Transforms user questions into executable GraphQL: + - **Natural input**: Ask questions in plain English + - **Structured output**: Get GraphQL query + variables + - **Schema-aware**: Uses knowledge graph schema + - **Confidence scoring**: Know how well question was understood + + Enables non-technical users to query knowledge graph. + + ## Process + + 1. Parse natural language question + 2. Identify entities and relationships + 3. Map to GraphQL schema types + 4. Generate query with variables + 5. Return query + confidence score + + ## Using Results + + Generated query can be: + - Executed via objects query service + - Inspected and modified if needed + - Cached for similar questions + + Example workflow: + ``` + 1. User asks: "Who does Alice know?" + 2. NLP Query generates GraphQL + 3. Execute via /api/v1/flow/{flow}/service/objects + 4. Return results to user + ``` + + ## Schema Detection + + Response includes `detected-schemas` array showing: + - Which types were identified + - What entities were matched + - Schema coverage of question + + Helps understand query scope. + + ## Confidence Scores + + - **0.9-1.0**: High confidence, likely correct + - **0.7-0.9**: Good confidence, probably correct + - **0.5-0.7**: Medium confidence, may need review + - **< 0.5**: Low confidence, likely incorrect + + Low scores suggest: + - Ambiguous question + - Missing schema coverage + - Complex query structure + + operationId: nlpQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/query/NlpQueryRequest.yaml' + examples: + simpleQuestion: + summary: Simple relationship question + value: + question: Who does Alice know? + max-results: 50 + complexQuestion: + summary: Multi-hop relationship + value: + question: What companies employ people that Alice knows? + max-results: 100 + filterQuestion: + summary: Question with filters + value: + question: Which engineers does Bob collaborate with? + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/query/NlpQueryResponse.yaml' + examples: + successfulQuery: + summary: Successful query generation + value: + graphql-query: | + query GetConnections($person: ID!) { + person(id: $person) { + knows { name email } + } + } + variables: + person: "https://example.com/person/alice" + detected-schemas: ["Person"] + confidence: 0.92 + complexQuery: + summary: Complex multi-hop query + value: + graphql-query: | + query GetCompanies($person: ID!) { + person(id: $person) { + knows { + worksFor { + name + industry + } + } + } + } + variables: + person: "https://example.com/person/alice" + detected-schemas: ["Person", "Organization"] + confidence: 0.85 + lowConfidence: + summary: Low confidence result + value: + graphql-query: | + query Search { + search(term: "unknown entities") { + results + } + } + variables: {} + detected-schemas: [] + confidence: 0.43 + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/objects.yaml b/specs/api/paths/flow/objects.yaml new file mode 100644 index 00000000..ac94a353 --- /dev/null +++ b/specs/api/paths/flow/objects.yaml @@ -0,0 +1,166 @@ +post: + tags: + - Flow Services + summary: Objects query - GraphQL over knowledge graph + description: | + Query knowledge graph using GraphQL for object-oriented data access. + + ## Objects Query Overview + + GraphQL interface to knowledge graph: + - **Schema-driven**: Predefined types and relationships + - **Flexible queries**: Request exactly what you need + - **Nested data**: Traverse relationships in single query + - **Type-safe**: Strong typing with introspection + + Abstracts RDF triples into familiar object model. + + ## GraphQL Benefits + + Compared to triples query: + - **Developer-friendly**: Objects instead of triples + - **Efficient**: Get related data in one query + - **Typed**: Schema defines available fields + - **Discoverable**: Introspection for tooling + + ## Query Structure + + Standard GraphQL query format: + ```graphql + query OperationName($var: Type!) { + fieldName(arg: $var) { + subField1 + subField2 + nestedObject { + nestedField + } + } + } + ``` + + ## Variables + + Pass variables for parameterized queries: + ```json + { + "query": "query GetPerson($id: ID!) { person(id: $id) { name } }", + "variables": {"id": "https://example.com/person/alice"} + } + ``` + + ## Error Handling + + GraphQL distinguishes: + - **Field errors**: Invalid query, missing fields (in `errors` array) + - **System errors**: Connection issues, timeouts (in `error` object) + + Partial data may be returned with field errors. + + ## Schema Definition + + Schema defines available types via config service. + Use introspection query to discover schema. + + operationId: objectsQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/query/ObjectsQueryRequest.yaml' + examples: + simpleQuery: + summary: Simple query + value: + query: | + { + person(id: "https://example.com/person/alice") { + name + email + } + } + user: alice + collection: research + queryWithVariables: + summary: Query with variables + value: + query: | + query GetPerson($id: ID!) { + person(id: $id) { + name + email + knows { + name + } + } + } + variables: + id: "https://example.com/person/alice" + operation-name: GetPerson + nestedQuery: + summary: Nested relationship query + value: + query: | + { + person(id: "https://example.com/person/alice") { + name + knows { + name + worksFor { + name + location + } + } + } + } + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/query/ObjectsQueryResponse.yaml' + examples: + successfulQuery: + summary: Successful query + value: + data: + person: + name: Alice + email: alice@example.com + knows: + - name: Bob + - name: Carol + extensions: + execution_time_ms: "42" + queryWithFieldErrors: + summary: Query with field errors + value: + data: + person: + name: Alice + email: null + errors: + - message: Cannot query field 'nonexistent' on type 'Person' + path: ["person", "nonexistent"] + systemError: + summary: System error + value: + data: null + error: + type: TIMEOUT_ERROR + message: Query execution timeout after 30s + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/prompt.yaml b/specs/api/paths/flow/prompt.yaml new file mode 100644 index 00000000..84a49f81 --- /dev/null +++ b/specs/api/paths/flow/prompt.yaml @@ -0,0 +1,143 @@ +post: + tags: + - Flow Services + summary: Prompt service - template-based generation + description: | + Execute stored prompt templates with variable substitution. + + ## Prompt Service Overview + + The prompt service enables: + - Reusable prompt templates stored in configuration + - Variable substitution for dynamic prompts + - Consistent prompt engineering across requests + - Text or structured object outputs + + ## Template System + + Prompts are stored via config service (`/api/v1/config`) with: + - **id**: Unique prompt identifier + - **template**: Prompt text with `{variable}` placeholders + - **system**: Optional system prompt + - **output_format**: "text" or "object" + + Example template: + ``` + Summarize the following document in {max_length} words: + + {document} + ``` + + ## Variable Substitution + + Two ways to pass variables: + + 1. **terms** (explicit JSON strings): + ```json + { + "terms": { + "document": "\"Text here...\"", + "max_length": "\"200\"" + } + } + ``` + + 2. **variables** (auto-converted): + ```json + { + "variables": { + "document": "Text here...", + "max_length": 200 + } + } + ``` + + ## Output Types + + - **text**: Plain text response in `text` field + - **object**: Structured JSON in `object` field (as string) + + ## Streaming + + Enable `streaming: true` to receive response incrementally. + + ## Use Cases + + - Document summarization + - Entity extraction + - Classification tasks + - Data transformation + - Any repeatable LLM task with consistent prompting + + operationId: promptService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/prompt/PromptRequest.yaml' + examples: + withTerms: + summary: Using terms (JSON strings) + value: + id: summarize-document + terms: + document: '"This document discusses quantum computing, covering qubits, superposition, and entanglement. Applications include cryptography and optimization."' + max_length: '"50"' + withVariables: + summary: Using variables (auto-converted) + value: + id: extract-entities + variables: + text: A paper by Einstein on relativity published in 1905. + entity_types: ["person", "year", "topic"] + streaming: + summary: Streaming response + value: + id: generate-report + variables: + data: {revenue: 1000000, growth: 15} + format: executive summary + streaming: true + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/prompt/PromptResponse.yaml' + examples: + textResponse: + summary: Text output + value: + text: This document provides an overview of quantum computing fundamentals and cryptographic applications. + end-of-stream: false + objectResponse: + summary: Structured output + value: + object: '{"entities": [{"type": "person", "value": "Einstein"}, {"type": "year", "value": "1905"}, {"type": "topic", "value": "relativity"}]}' + end-of-stream: false + streamingChunk: + summary: Streaming chunk + value: + text: This document provides an overview + end-of-stream: false + streamingComplete: + summary: Streaming complete + value: + text: "" + end-of-stream: true + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/structured-diag.yaml b/specs/api/paths/flow/structured-diag.yaml new file mode 100644 index 00000000..9de56152 --- /dev/null +++ b/specs/api/paths/flow/structured-diag.yaml @@ -0,0 +1,172 @@ +post: + tags: + - Flow Services + summary: Structured Diag - analyze structured data formats + description: | + Analyze and understand structured data (CSV, JSON, XML). + + ## Structured Diag Overview + + Helps process unknown structured data: + - **Detect format**: Identify CSV, JSON, or XML + - **Generate schema**: Create descriptor from sample + - **Match schemas**: Find existing schemas that fit data + - **Full diagnosis**: Complete analysis in one call + + Essential for data ingestion pipelines. + + ## Operations + + ### detect-type + Identify data format from sample: + - Input: Data sample + - Output: Format (csv/json/xml) + confidence + - Use when: Format is unknown + + ### generate-descriptor + Create schema descriptor: + - Input: Sample + known type + - Output: Field definitions, types, structure + - Use when: Need to understand data structure + + ### diagnose (recommended) + Combined analysis: + - Input: Data sample + - Output: Format + descriptor + metadata + - Use when: Starting from scratch + + ### schema-selection + Find matching schemas: + - Input: Data sample + - Output: List of schema IDs that match + - Use when: Have existing schemas, need to match data + + ## Data Types + + Supported formats: + - **CSV**: Comma-separated values (or custom delimiter) + - **JSON**: JSON objects or arrays + - **XML**: XML documents + + ## Options + + Format-specific options: + - **CSV**: delimiter, has_header, quote_char + - **JSON**: array_path (for nested arrays) + - **XML**: root_element, record_path + + ## Workflow Example + + 1. Receive unknown data file + 2. Call diagnose operation with sample + 3. Get format + schema descriptor + 4. Use descriptor to process full dataset + 5. Load data via document-load or text-load + + operationId: structuredDiagService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/diag/StructuredDiagRequest.yaml' + examples: + detectType: + summary: Detect data type + value: + operation: detect-type + sample: | + name,age,email + Alice,30,alice@example.com + Bob,25,bob@example.com + generateDescriptor: + summary: Generate schema descriptor + value: + operation: generate-descriptor + sample: | + name,age,email + Alice,30,alice@example.com + type: csv + schema-name: person-records + options: + delimiter: "," + has_header: "true" + diagnose: + summary: Full diagnosis + value: + operation: diagnose + sample: | + [ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 25} + ] + schemaSelection: + summary: Find matching schemas + value: + operation: schema-selection + sample: | + name,email,phone + Alice,alice@example.com,555-1234 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/diag/StructuredDiagResponse.yaml' + examples: + detectedType: + summary: Type detection result + value: + operation: detect-type + detected-type: csv + confidence: 0.95 + generatedDescriptor: + summary: Generated descriptor + value: + operation: generate-descriptor + descriptor: + schema_name: person-records + type: csv + fields: + - {name: name, type: string} + - {name: age, type: integer} + - {name: email, type: string} + metadata: + field_count: "3" + has_header: "true" + fullDiagnosis: + summary: Complete diagnosis + value: + operation: diagnose + detected-type: json + confidence: 0.98 + descriptor: + type: json + structure: array_of_objects + fields: + - {name: name, type: string} + - {name: age, type: integer} + metadata: + record_count: "2" + schemaMatches: + summary: Schema selection results + value: + operation: schema-selection + schema-matches: + - person-schema-v1 + - contact-schema-v2 + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/structured-query.yaml b/specs/api/paths/flow/structured-query.yaml new file mode 100644 index 00000000..c094c50a --- /dev/null +++ b/specs/api/paths/flow/structured-query.yaml @@ -0,0 +1,134 @@ +post: + tags: + - Flow Services + summary: Structured Query - question to results (all-in-one) + description: | + Ask natural language questions and get results directly. + + ## Structured Query Overview + + Combines two operations in one call: + 1. **NLP Query**: Generate GraphQL from question + 2. **Objects Query**: Execute generated query + 3. **Return Results**: Direct answer data + + Simplest way to query knowledge graph with natural language. + + ## Comparison with Other Services + + ### Structured Query (this service) + - **Input**: Natural language question + - **Output**: Query results (data) + - **Use when**: Want simple, direct answers + + ### NLP Query + Objects Query (separate calls) + - **Step 1**: Convert question → GraphQL + - **Step 2**: Execute GraphQL → results + - **Use when**: Need to inspect/modify query before execution + + ### Triples Query (low-level) + - **Input**: RDF pattern + - **Output**: Matching triples + - **Use when**: Need precise control over graph queries + + ## Response Format + + Returns standard GraphQL response: + - **data**: Query results (null if error) + - **errors**: Field-level errors (array of strings) + - **error**: System-level error (generation or execution failure) + + ## Error Handling + + Three types of errors: + 1. **Query generation failed**: Couldn't understand question + - Error in `error` object + - data = null + 2. **Query execution failed**: Generated query had errors + - Errors in `errors` array + - data may be partial + 3. **System error**: Infrastructure issue + - Error in `error` object + + ## Performance + + Convenience vs control trade-off: + - **Faster development**: One call instead of two + - **Less control**: Can't inspect/modify generated query + - **Simpler code**: No need to handle intermediate steps + + operationId: structuredQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/query/StructuredQueryRequest.yaml' + examples: + simpleQuestion: + summary: Simple relationship question + value: + question: Who does Alice know? + user: alice + collection: research + complexQuestion: + summary: Complex multi-hop question + value: + question: What companies employ engineers that Bob collaborates with? + user: bob + collection: work + filterQuestion: + summary: Question with implicit filters + value: + question: Which researchers work on quantum computing? + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/query/StructuredQueryResponse.yaml' + examples: + successfulQuery: + summary: Successful query with results + value: + data: + person: + name: Alice + knows: + - name: Bob + email: bob@example.com + - name: Carol + email: carol@example.com + errors: [] + partialResults: + summary: Partial results with errors + value: + data: + person: + name: Alice + knows: null + errors: + - Cannot query field 'nonexistent' on type 'Person' + generationFailed: + summary: Query generation failed + value: + data: null + errors: [] + error: + type: QUERY_GENERATION_ERROR + message: Could not understand question structure + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/text-completion.yaml b/specs/api/paths/flow/text-completion.yaml new file mode 100644 index 00000000..7526d0c1 --- /dev/null +++ b/specs/api/paths/flow/text-completion.yaml @@ -0,0 +1,125 @@ +post: + tags: + - Flow Services + summary: Text completion - direct LLM generation + description: | + Direct text completion using LLM without retrieval augmentation. + + ## Text Completion Overview + + Pure LLM generation for: + - General knowledge questions + - Creative writing + - Code generation + - Analysis and reasoning + - Any task not requiring specific document/graph context + + ## System vs Prompt + + - **system**: Sets LLM behavior, role, constraints + - "You are a helpful assistant" + - "You are an expert Python developer" + - "Respond in JSON format" + - **prompt**: The actual user request/question + + ## Streaming + + Enable `streaming: true` to receive tokens as generated: + - Multiple messages with partial `response` + - Final message with `end-of-stream: true` + + Without streaming, returns complete response in single message. + + ## Token Counting + + Response includes token usage: + - `in-token`: Input tokens (system + prompt) + - `out-token`: Generated tokens + - Useful for cost tracking and optimization + + ## When to Use + + Use text-completion when: + - No specific context needed (general knowledge) + - System prompt provides sufficient context + - Want direct control over prompting + + Use document-rag/graph-rag when: + - Need to ground response in specific documents + - Want to leverage knowledge graph relationships + - Require citations or provenance + + operationId: textCompletionService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/text-completion/TextCompletionRequest.yaml' + examples: + basicCompletion: + summary: Basic text completion + value: + system: You are a helpful assistant that provides concise answers. + prompt: Explain the concept of recursion in programming. + codeGeneration: + summary: Code generation with streaming + value: + system: You are an expert Python developer. Provide clean, well-documented code. + prompt: Write a function to calculate the Fibonacci sequence using memoization. + streaming: true + jsonResponse: + summary: Structured output request + value: + system: You are a JSON API. Respond only with valid JSON, no other text. + prompt: | + Extract key information from this text and return as JSON with fields: + title, author, year, summary. + + Text: "The Theory of Everything by Stephen Hawking (2006) explores..." + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/text-completion/TextCompletionResponse.yaml' + examples: + completeResponse: + summary: Complete non-streaming response + value: + response: | + Recursion is a programming technique where a function calls itself + to solve a problem by breaking it down into smaller, similar subproblems. + Each recursive call works on a simpler version until reaching a base case. + in-token: 45 + out-token: 128 + model: gpt-4 + end-of-stream: false + streamingChunk: + summary: Streaming response chunk + value: + response: "Recursion is a programming technique" + end-of-stream: false + streamingComplete: + summary: Streaming complete with tokens + value: + response: "" + in-token: 45 + out-token: 128 + model: gpt-4 + end-of-stream: true + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/text-load.yaml b/specs/api/paths/flow/text-load.yaml new file mode 100644 index 00000000..5f918a3a --- /dev/null +++ b/specs/api/paths/flow/text-load.yaml @@ -0,0 +1,111 @@ +post: + tags: + - Flow Services + summary: Text Load - load text documents + description: | + Load text documents into processing pipeline for indexing and embedding. + + ## Text Load Overview + + Fire-and-forget document loading: + - **Input**: Text content (base64 encoded) + - **Process**: Chunk, embed, store + - **Output**: None (202 Accepted) + + Asynchronous processing - document queued for background processing. + + ## Processing Pipeline + + Text documents go through: + 1. **Chunking**: Split into overlapping chunks + 2. **Embedding**: Generate vectors for each chunk + 3. **Storage**: Store chunks + embeddings + 4. **Indexing**: Make searchable via document-embeddings query + + Pipeline runs asynchronously after request returns. + + ## Text Format + + Text must be base64 encoded: + ``` + text_content = "This is the document..." + encoded = base64.b64encode(text_content.encode('utf-8')) + ``` + + Default charset is UTF-8, specify `charset` if different. + + ## Metadata + + Optional RDF triples describing document: + - Title, author, date + - Source URL + - Custom properties + - Used for organization and retrieval + + ## Use Cases + + - **Document ingestion**: Add documents to knowledge base + - **Bulk loading**: Process multiple documents + - **Content updates**: Replace existing documents + - **Library integration**: Load from document library + + ## No Response Data + + Returns 202 Accepted immediately: + - Document queued for processing + - No synchronous result + - No processing status + - Check document-embeddings query later to verify indexed + + operationId: textLoadService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/loading/TextLoadRequest.yaml' + examples: + simpleLoad: + summary: Load text document + value: + text: VGhpcyBpcyB0aGUgZG9jdW1lbnQgdGV4dC4uLg== + id: doc-123 + user: alice + collection: research + withMetadata: + summary: Load with RDF metadata + value: + text: UXVhbnR1bSBjb21wdXRpbmcgdXNlcyBxdWFudHVtIG1lY2hhbmljcyBwcmluY2lwbGVzLi4u + id: doc-456 + user: alice + collection: research + metadata: + - s: {v: "doc-456", e: false} + p: {v: "http://purl.org/dc/terms/title", e: true} + o: {v: "Introduction to Quantum Computing", e: false} + - s: {v: "doc-456", e: false} + p: {v: "http://purl.org/dc/terms/creator", e: true} + o: {v: "Dr. Alice Smith", e: false} + responses: + '202': + description: Document accepted for processing + content: + application/json: + schema: + type: object + properties: {} + example: {} + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/flow/triples.yaml b/specs/api/paths/flow/triples.yaml new file mode 100644 index 00000000..5557ea5a --- /dev/null +++ b/specs/api/paths/flow/triples.yaml @@ -0,0 +1,129 @@ +post: + tags: + - Flow Services + summary: Triples query - pattern-based graph queries + description: | + Query knowledge graph using subject-predicate-object patterns. + + ## Triples Query Overview + + Query RDF triples with flexible pattern matching: + - Specify subject, predicate, and/or object + - Any combination of filters (all optional) + - Returns matching triples up to limit + + ## Pattern Matching + + Pattern syntax supports: + - **All triples**: Omit all filters (returns everything up to limit) + - **Subject match**: Specify `s` only (all triples about that subject) + - **Predicate match**: Specify `p` only (all uses of that property) + - **Object match**: Specify `o` only (all triples with that value) + - **Combinations**: Any combination of s/p/o + + ## RDF Value Format + + Each component (s/p/o) uses RdfValue format: + - **Entity/URI**: `{"v": "https://example.com/entity", "e": true}` + - **Literal**: `{"v": "Some text", "e": false}` + + ## Query Examples + + Find all properties of an entity: + ```json + {"s": {"v": "https://example.com/person/alice", "e": true}} + ``` + + Find all instances of a type: + ```json + { + "p": {"v": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "e": true}, + "o": {"v": "https://example.com/type/Person", "e": true} + } + ``` + + Find specific relationship: + ```json + { + "s": {"v": "https://example.com/person/alice", "e": true}, + "p": {"v": "https://example.com/knows", "e": true} + } + ``` + + ## Performance + + - Default limit: 10,000 triples + - Max limit: 100,000 triples + - More specific patterns = faster queries + - Consider limit for large result sets + + operationId: triplesQueryService + security: + - bearerAuth: [] + parameters: + - name: flow + in: path + required: true + schema: + type: string + description: Flow instance ID + example: my-flow + requestBody: + required: true + content: + application/json: + schema: + $ref: '../../components/schemas/query/TriplesQueryRequest.yaml' + examples: + allTriplesAboutEntity: + summary: All triples about an entity + value: + s: + v: https://example.com/person/alice + e: true + user: alice + collection: research + limit: 100 + allInstancesOfType: + summary: Find all instances of a type + value: + p: + v: http://www.w3.org/1999/02/22-rdf-syntax-ns#type + e: true + o: + v: https://example.com/type/Person + e: true + limit: 50 + specificRelationship: + summary: Find specific relationships + value: + p: + v: https://example.com/knows + e: true + user: alice + limit: 200 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../../components/schemas/query/TriplesQueryResponse.yaml' + examples: + matchingTriples: + summary: Matching triples + value: + response: + - s: {v: "https://example.com/person/alice", e: true} + p: {v: "https://www.w3.org/1999/02/22-rdf-syntax-ns#type", e: true} + o: {v: "https://example.com/type/Person", e: true} + - s: {v: "https://example.com/person/alice", e: true} + p: {v: "https://www.w3.org/2000/01/rdf-schema#label", e: true} + o: {v: "Alice", e: false} + - s: {v: "https://example.com/person/alice", e: true} + p: {v: "https://example.com/knows", e: true} + o: {v: "https://example.com/person/bob", e: true} + '401': + $ref: '../../components/responses/Unauthorized.yaml' + '500': + $ref: '../../components/responses/Error.yaml' diff --git a/specs/api/paths/import-core.yaml b/specs/api/paths/import-core.yaml new file mode 100644 index 00000000..38c99bf0 --- /dev/null +++ b/specs/api/paths/import-core.yaml @@ -0,0 +1,106 @@ +post: + tags: + - Import/Export + summary: Import Core - bulk import triples and embeddings + description: | + Import knowledge cores in bulk using streaming MessagePack format. + + ## Import Core Overview + + Bulk data import for knowledge graph: + - **Format**: MessagePack streaming + - **Content**: Triples and/or graph embeddings + - **Target**: Global knowledge storage + - **Use**: Backup restoration, data migration, bulk loading + + ## MessagePack Protocol + + Request body is MessagePack stream with message tuples: + + ### Triple Message + ``` + ("t", { + "m": { // Metadata + "i": "core-id", // Knowledge core ID + "m": [...], // Metadata triples array + "u": "user", // User + "c": "collection" // Collection + }, + "t": [...] // Triples array + }) + ``` + + ### Graph Embeddings Message + ``` + ("ge", { + "m": { // Metadata + "i": "core-id", + "m": [...], + "u": "user", + "c": "collection" + }, + "e": [ // Entities array + { + "e": {"v": "uri", "e": true}, // Entity RdfValue + "v": [0.1, 0.2, ...] // Vectors + } + ] + }) + ``` + + ## Query Parameters + + - **id**: Knowledge core ID + - **user**: User identifier + + ## Streaming + + Multiple messages can be sent in stream. + Each message processed as received. + No response body - returns 202 Accepted. + + ## Use Cases + + - **Backup restoration**: Restore from export + - **Data migration**: Move data between systems + - **Bulk loading**: Initial knowledge base population + - **Replication**: Copy knowledge cores + + operationId: importCore + security: + - bearerAuth: [] + parameters: + - name: id + in: query + required: true + schema: + type: string + description: Knowledge core ID to import + example: core-123 + - name: user + in: query + required: true + schema: + type: string + description: User identifier + example: alice + requestBody: + required: true + content: + application/msgpack: + schema: + type: string + format: binary + description: MessagePack stream of knowledge data + responses: + '202': + description: Import accepted and processing + content: + application/json: + schema: + type: object + properties: {} + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/knowledge.yaml b/specs/api/paths/knowledge.yaml new file mode 100644 index 00000000..71bba496 --- /dev/null +++ b/specs/api/paths/knowledge.yaml @@ -0,0 +1,196 @@ +post: + tags: + - Knowledge + summary: Knowledge graph core management + description: | + Manage knowledge graph cores - persistent storage of triples and embeddings. + + ## Knowledge Cores + + Knowledge cores are the foundational storage units for: + - **Triples**: RDF triples representing knowledge graph data + - **Graph Embeddings**: Vector embeddings for entities + - **Metadata**: Descriptive information about the knowledge + + Each core has an ID, user, and collection for organization. + + ## Operations + + ### list-kg-cores + List all knowledge cores for a user. Returns array of core IDs. + + ### get-kg-core + Retrieve a knowledge core by ID. Returns triples and/or graph embeddings. + Response is streamed - may receive multiple messages followed by EOS marker. + + ### put-kg-core + Store triples and/or graph embeddings. Creates new core or updates existing. + Can store triples only, embeddings only, or both together. + + ### delete-kg-core + Delete a knowledge core by ID. Removes all associated data. + + ### load-kg-core + Load a knowledge core into a running flow's collection. + Makes the data available for querying within that flow instance. + + ### unload-kg-core + Unload a knowledge core from a flow's collection. + Removes data from flow instance but doesn't delete the core. + + ## Streaming Responses + + The `get-kg-core` operation streams data in chunks: + 1. Multiple messages with `triples` or `graph-embeddings` + 2. Final message with `eos: true` to signal completion + + operationId: knowledgeService + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '../components/schemas/knowledge/KnowledgeRequest.yaml' + examples: + listKnowledgeCores: + summary: List knowledge cores + value: + operation: list-kg-cores + user: alice + getKnowledgeCore: + summary: Get knowledge core + value: + operation: get-kg-core + id: core-123 + putTriplesOnly: + summary: Store triples + value: + operation: put-kg-core + triples: + metadata: + id: core-123 + user: alice + collection: default + metadata: + - s: {v: "https://example.com/core-123", e: true} + p: {v: "https://www.w3.org/1999/02/22-rdf-syntax-ns#type", e: true} + o: {v: "https://trustgraph.ai/e/knowledge-core", e: true} + triples: + - s: {v: "https://example.com/entity1", e: true} + p: {v: "https://www.w3.org/2000/01/rdf-schema#label", e: true} + o: {v: "Entity 1", e: false} + - s: {v: "https://example.com/entity1", e: true} + p: {v: "https://example.com/relatedTo", e: true} + o: {v: "https://example.com/entity2", e: true} + putEmbeddingsOnly: + summary: Store embeddings + value: + operation: put-kg-core + graph-embeddings: + metadata: + id: core-123 + user: alice + collection: default + metadata: [] + entities: + - entity: {v: "https://example.com/entity1", e: true} + vectors: [0.1, 0.2, 0.3, 0.4, 0.5] + - entity: {v: "https://example.com/entity2", e: true} + vectors: [0.6, 0.7, 0.8, 0.9, 1.0] + putTriplesAndEmbeddings: + summary: Store triples and embeddings together + value: + operation: put-kg-core + triples: + metadata: + id: core-456 + user: bob + collection: research + metadata: [] + triples: + - s: {v: "https://example.com/doc1", e: true} + p: {v: "http://purl.org/dc/terms/title", e: true} + o: {v: "Research Paper", e: false} + graph-embeddings: + metadata: + id: core-456 + user: bob + collection: research + metadata: [] + entities: + - entity: {v: "https://example.com/doc1", e: true} + vectors: [0.11, 0.22, 0.33] + deleteKnowledgeCore: + summary: Delete knowledge core + value: + operation: delete-kg-core + id: core-123 + user: alice + loadKnowledgeCore: + summary: Load core into flow + value: + operation: load-kg-core + id: core-123 + flow: my-flow + collection: default + unloadKnowledgeCore: + summary: Unload core from flow + value: + operation: unload-kg-core + id: core-123 + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../components/schemas/knowledge/KnowledgeResponse.yaml' + examples: + listKnowledgeCores: + summary: List of knowledge cores + value: + ids: + - core-123 + - core-456 + - core-789 + getKnowledgeCoreTriples: + summary: Knowledge core triples (streaming) + value: + triples: + metadata: + id: core-123 + user: alice + collection: default + metadata: + - s: {v: "https://example.com/core-123", e: true} + p: {v: "https://www.w3.org/1999/02/22-rdf-syntax-ns#type", e: true} + o: {v: "https://trustgraph.ai/e/knowledge-core", e: true} + triples: + - s: {v: "https://example.com/entity1", e: true} + p: {v: "https://www.w3.org/2000/01/rdf-schema#label", e: true} + o: {v: "Entity 1", e: false} + getKnowledgeCoreEmbeddings: + summary: Knowledge core embeddings (streaming) + value: + graph-embeddings: + metadata: + id: core-123 + user: alice + collection: default + metadata: [] + entities: + - entity: {v: "https://example.com/entity1", e: true} + vectors: [0.1, 0.2, 0.3, 0.4, 0.5] + endOfStream: + summary: End of stream marker + value: + eos: true + deleteSuccess: + summary: Delete successful (empty response) + value: {} + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/librarian.yaml b/specs/api/paths/librarian.yaml new file mode 100644 index 00000000..ffbc6d9c --- /dev/null +++ b/specs/api/paths/librarian.yaml @@ -0,0 +1,153 @@ +post: + tags: + - Librarian + summary: Document library management + description: | + Manage document library: add, remove, list documents, and control processing. + + ## Document Library + + The librarian service manages a persistent library of documents that can be: + - Added with metadata for organization + - Queried and filtered by criteria + - Processed through flows on-demand or continuously + - Tracked for processing status + + ## Operations + + ### add-document + Add a document to the library with metadata (URL, title, author, etc.). + Documents can be added by URL or with inline content. + + ### remove-document + Remove a document from the library by document ID or URL. + + ### list-documents + List all documents in the library, optionally filtered by criteria. + + ### start-processing + Start processing library documents through a flow. Documents are queued + for processing and handled asynchronously. + + ### stop-processing + Stop ongoing library document processing. + + ### list-processing + List current processing tasks and their status. + + operationId: librarianService + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '../components/schemas/librarian/LibrarianRequest.yaml' + examples: + addDocumentByUrl: + summary: Add document by URL + value: + operation: add-document + flow: my-flow + collection: default + document-metadata: + url: https://example.com/document.pdf + title: Example Document + author: John Doe + metadata: + department: Engineering + category: Technical + addDocumentInline: + summary: Add document with inline content + value: + operation: add-document + flow: my-flow + collection: default + content: "This is the document content..." + document-metadata: + title: Inline Document + author: Jane Smith + removeDocument: + summary: Remove document + value: + operation: remove-document + flow: my-flow + collection: default + document-metadata: + url: https://example.com/document.pdf + listDocuments: + summary: List all documents + value: + operation: list-documents + flow: my-flow + collection: default + listDocumentsFiltered: + summary: List documents with criteria + value: + operation: list-documents + flow: my-flow + collection: default + criteria: + - key: author + value: John Doe + operator: eq + - key: department + value: Engineering + operator: eq + startProcessing: + summary: Start processing library documents + value: + operation: start-processing + flow: my-flow + collection: default + stopProcessing: + summary: Stop processing + value: + operation: stop-processing + flow: my-flow + collection: default + listProcessing: + summary: List processing status + value: + operation: list-processing + flow: my-flow + collection: default + responses: + '200': + description: Successful response + content: + application/json: + schema: + $ref: '../components/schemas/librarian/LibrarianResponse.yaml' + examples: + listDocuments: + summary: List of documents + value: + document-metadatas: + - url: https://example.com/doc1.pdf + title: Document 1 + author: John Doe + metadata: + department: Engineering + - url: https://example.com/doc2.pdf + title: Document 2 + author: Jane Smith + metadata: + department: Research + listProcessing: + summary: Processing status + value: + processing-metadatas: + - flow: my-flow + collection: default + status: processing + timestamp: "2024-01-15T10:30:00Z" + - flow: my-flow + collection: default + status: completed + timestamp: "2024-01-15T10:25:00Z" + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/metrics-path.yaml b/specs/api/paths/metrics-path.yaml new file mode 100644 index 00000000..50c8b840 --- /dev/null +++ b/specs/api/paths/metrics-path.yaml @@ -0,0 +1,29 @@ +get: + tags: + - Metrics + summary: Metrics - Prometheus metrics with path + description: | + Proxy to Prometheus metrics with optional path parameter. + + operationId: getMetricsPath + security: + - bearerAuth: [] + parameters: + - name: path + in: path + required: true + schema: + type: string + description: Path to specific metrics endpoint + example: query + responses: + '200': + description: Prometheus metrics + content: + text/plain: + schema: + type: string + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/metrics.yaml b/specs/api/paths/metrics.yaml new file mode 100644 index 00000000..0fe65438 --- /dev/null +++ b/specs/api/paths/metrics.yaml @@ -0,0 +1,71 @@ +get: + tags: + - Metrics + summary: Metrics - Prometheus metrics endpoint + description: | + Proxy to Prometheus metrics for system monitoring. + + ## Metrics Overview + + Exposes system metrics via Prometheus format: + - **Gateway metrics**: Request rates, latencies, errors + - **Flow metrics**: Processing throughput, queue depths + - **System metrics**: Resource usage, health status + + ## Prometheus Format + + Returns metrics in Prometheus text exposition format: + ``` + # HELP metric_name Description + # TYPE metric_name counter + metric_name{label="value"} 123.45 + ``` + + ## Available Metrics + + Common metrics include: + - Request count and rates + - Response times (histograms) + - Error rates + - Active connections + - Queue depths + - Processing latencies + + ## Integration + + Standard Prometheus scraping: + - Configure Prometheus to scrape `/api/metrics` + - Set appropriate scrape interval + - Use bearer token if authentication enabled + + ## Path Parameter + + The `{path}` parameter allows querying specific Prometheus endpoints + or metrics if the backend Prometheus supports it. + + operationId: getMetrics + security: + - bearerAuth: [] + responses: + '200': + description: Prometheus metrics + content: + text/plain: + schema: + type: string + example: | + # HELP http_requests_total Total HTTP requests + # TYPE http_requests_total counter + http_requests_total{method="POST",endpoint="/api/v1/flow/my-flow/service/agent"} 1234 + + # HELP http_request_duration_seconds HTTP request latency + # TYPE http_request_duration_seconds histogram + http_request_duration_seconds_bucket{le="0.1"} 500 + http_request_duration_seconds_bucket{le="0.5"} 950 + http_request_duration_seconds_bucket{le="1.0"} 990 + http_request_duration_seconds_sum 450.5 + http_request_duration_seconds_count 1000 + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/paths/websocket.yaml b/specs/api/paths/websocket.yaml new file mode 100644 index 00000000..168ee1e4 --- /dev/null +++ b/specs/api/paths/websocket.yaml @@ -0,0 +1,185 @@ +get: + tags: + - WebSocket + summary: WebSocket - multiplexed service interface + description: | + WebSocket interface providing multiplexed access to all TrustGraph services over a single persistent connection. + + ## Overview + + The WebSocket API provides access to the same services as the REST API but with: + - **Multiplexed**: Multiple concurrent requests over one connection + - **Asynchronous**: Non-blocking request/response with ID matching + - **Efficient**: Reduced overhead compared to HTTP + - **Real-time**: Low latency bidirectional communication + + ## Connection + + Establish WebSocket connection to: + ``` + ws://localhost:8088/api/v1/socket + ``` + + ## Message Protocol + + All messages are JSON objects with the following structure: + + ### Request Message Format + + **Global Service Request** (no flow parameter): + ```json + { + "id": "req-123", + "service": "config", + "request": { + "operation": "list", + "type": "flow" + } + } + ``` + + **Flow-Hosted Service Request** (with flow parameter): + ```json + { + "id": "req-456", + "service": "agent", + "flow": "my-flow", + "request": { + "question": "What is quantum computing?", + "streaming": true + } + } + ``` + + **Request Fields**: + - `id` (string, required): Client-generated unique identifier for this request within the session. Used to match responses to requests. + - `service` (string, required): Service identifier (e.g., "config", "agent", "document-rag"). Same as `{kind}` in REST URLs. + - `flow` (string, optional): Flow ID for flow-hosted services. Omit for global services. + - `request` (object, required): Service-specific request payload. Same structure as REST API request body. + + ### Response Message Format + + **Success Response**: + ```json + { + "id": "req-123", + "response": { + "chunk-type": "answer", + "content": "Quantum computing uses...", + "end-of-stream": false + } + } + ``` + + **Error Response**: + ```json + { + "id": "req-123", + "error": { + "type": "gateway-error", + "message": "Flow not found" + } + } + ``` + + **Response Fields**: + - `id` (string, required): Matches the `id` from the request. Client uses this to correlate responses. + - `response` (object, conditional): Service-specific response payload. Same structure as REST API response. Present on success. + - `error` (object, conditional): Error information with `type` and `message` fields. Present on failure. + + ## Service Routing + + The WebSocket protocol routes to services using message parameters instead of URL paths: + + | REST Endpoint | WebSocket Message | + |--------------|-------------------| + | `POST /api/v1/config` | `{"service": "config"}` | + | `POST /api/v1/flow/{flow}/service/agent` | `{"service": "agent", "flow": "my-flow"}` | + + **Global Services** (no `flow` parameter): + - `config` - Configuration management + - `flow` - Flow lifecycle and blueprints + - `librarian` - Document library management + - `knowledge` - Knowledge graph core management + - `collection-management` - Collection metadata + + **Flow-Hosted Services** (require `flow` parameter): + - AI services: `agent`, `text-completion`, `prompt`, `document-rag`, `graph-rag` + - Embeddings: `embeddings`, `graph-embeddings`, `document-embeddings` + - Query: `triples`, `objects`, `nlp-query`, `structured-query` + - Data loading: `text-load`, `document-load` + - Utilities: `mcp-tool`, `structured-diag` + + ## Request/Response Schemas + + The `request` and `response` fields use **identical schemas** to the REST API for each service. + See individual service documentation for detailed request/response formats. + + ## Multiplexing and Asynchronous Operation + + Multiple requests can be in flight simultaneously: + - Client sends requests with unique `id` values + - Server processes requests concurrently + - Responses arrive asynchronously and may be out of order + - Client matches responses to requests using the `id` field + - No head-of-line blocking + + **Example concurrent requests**: + ```json + {"id": "req-1", "service": "config", "request": {...}} + {"id": "req-2", "service": "agent", "flow": "f1", "request": {...}} + {"id": "req-3", "service": "document-rag", "flow": "f2", "request": {...}} + ``` + + Responses may arrive in any order: `req-2`, `req-1`, `req-3` + + ## Streaming Responses + + Services that support streaming (e.g., agent, RAG) send multiple response messages with the same `id`: + ```json + {"id": "req-1", "response": {"chunk-type": "thought", "content": "...", "end-of-stream": false}} + {"id": "req-1", "response": {"chunk-type": "answer", "content": "...", "end-of-stream": false}} + {"id": "req-1", "response": {"chunk-type": "answer", "content": "...", "end-of-stream": true}} + ``` + + The `end-of-stream` flag (or service-specific completion flag) indicates the final message. + + ## Authentication + + When `GATEWAY_SECRET` is set, include bearer token: + - As query parameter: `ws://localhost:8088/api/v1/socket?token=` + - Or in WebSocket subprotocol header + + ## Benefits Over REST + + - **Lower latency**: No TCP/TLS handshake per request + - **Connection reuse**: Single persistent connection + - **Reduced overhead**: No HTTP headers per message + - **True streaming**: Bidirectional real-time communication + - **Efficient multiplexing**: Concurrent operations without connection pooling + + operationId: websocketConnection + security: + - bearerAuth: [] + parameters: + - name: Upgrade + in: header + required: true + schema: + type: string + enum: [websocket] + description: WebSocket upgrade header + - name: Connection + in: header + required: true + schema: + type: string + enum: [Upgrade] + description: Connection upgrade header + responses: + '101': + description: Switching Protocols - WebSocket connection established + '401': + $ref: '../components/responses/Unauthorized.yaml' + '500': + $ref: '../components/responses/Error.yaml' diff --git a/specs/api/security/bearerAuth.yaml b/specs/api/security/bearerAuth.yaml new file mode 100644 index 00000000..ca776645 --- /dev/null +++ b/specs/api/security/bearerAuth.yaml @@ -0,0 +1,12 @@ +type: http +scheme: bearer +description: | + Bearer token authentication. + + Set via `GATEWAY_SECRET` environment variable on the gateway. + If `GATEWAY_SECRET` is not set, authentication is disabled (development mode). + + Example: + ``` + Authorization: Bearer your-secret-token + ``` From 8a1737560388c48d573be29e2558b426c76318a4 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Thu, 15 Jan 2026 11:57:16 +0000 Subject: [PATCH 8/9] Add AsyncAPI spec for websocket (#613) * AsyncAPI for websocket docs * Delete old docs * Update docs/README.md to point to docs site * Add generated API docs --- docs/README.development.md | 82 - docs/README.md | 60 +- docs/README.quickstart-docker-compose.md | 185 - docs/api.html | 3497 +++++++++++++++++ docs/apis/README.md | 109 - docs/apis/api-agent.md | 136 - docs/apis/api-config.md | 261 -- docs/apis/api-core-import-export.md | 324 -- docs/apis/api-document-embeddings.md | 252 -- docs/apis/api-document-load.md | 3 - docs/apis/api-document-rag.md | 96 - docs/apis/api-embeddings.md | 107 - docs/apis/api-entity-contexts.md | 259 -- docs/apis/api-flow.md | 301 -- docs/apis/api-graph-embeddings.md | 156 - docs/apis/api-graph-rag.md | 98 - docs/apis/api-knowledge.md | 310 -- docs/apis/api-librarian.md | 392 -- docs/apis/api-mcp-tool.md | 137 - docs/apis/api-metrics.md | 313 -- docs/apis/api-prompt.md | 141 - docs/apis/api-text-completion.md | 106 - docs/apis/api-text-load.md | 168 - docs/apis/api-triples-query.md | 215 - docs/apis/pulsar.md | 230 -- docs/apis/websocket.md | 141 - docs/cli/README.md | 173 - docs/cli/tg-add-library-document.md | 285 -- docs/cli/tg-delete-flow-blueprint.md | 330 -- docs/cli/tg-delete-kg-core.md | 312 -- docs/cli/tg-delete-mcp-tool.md | 374 -- docs/cli/tg-delete-tool.md | 317 -- docs/cli/tg-dump-msgpack.md | 489 --- docs/cli/tg-get-flow-blueprint.md | 344 -- docs/cli/tg-get-kg-core.md | 365 -- docs/cli/tg-graph-to-turtle.md | 494 --- docs/cli/tg-init-pulsar-manager.md | 452 --- docs/cli/tg-init-trustgraph.md | 523 --- docs/cli/tg-invoke-agent.md | 163 - docs/cli/tg-invoke-document-rag.md | 438 --- docs/cli/tg-invoke-graph-rag.md | 221 -- docs/cli/tg-invoke-llm.md | 267 -- docs/cli/tg-invoke-mcp-tool.md | 448 --- docs/cli/tg-invoke-prompt.md | 430 -- docs/cli/tg-load-doc-embeds.md | 568 --- docs/cli/tg-load-kg-core.md | 313 -- docs/cli/tg-load-pdf.md | 480 --- docs/cli/tg-load-sample-documents.md | 567 --- docs/cli/tg-load-text.md | 211 - docs/cli/tg-load-turtle.md | 505 --- docs/cli/tg-put-flow-blueprint.md | 406 -- docs/cli/tg-put-kg-core.md | 241 -- docs/cli/tg-remove-library-document.md | 530 --- docs/cli/tg-save-doc-embeds.md | 609 --- docs/cli/tg-set-mcp-tool.md | 379 -- docs/cli/tg-set-prompt.md | 442 --- docs/cli/tg-set-token-costs.md | 464 --- docs/cli/tg-set-tool.md | 322 -- docs/cli/tg-show-config.md | 170 - docs/cli/tg-show-flow-blueprints.md | 330 -- docs/cli/tg-show-flow-state.md | 518 --- docs/cli/tg-show-flows.md | 207 - docs/cli/tg-show-graph.md | 286 -- docs/cli/tg-show-kg-cores.md | 227 -- docs/cli/tg-show-library-documents.md | 481 --- docs/cli/tg-show-library-processing.md | 572 --- docs/cli/tg-show-processor-state.md | 196 - docs/cli/tg-show-prompts.md | 454 --- docs/cli/tg-show-token-costs.md | 470 --- docs/cli/tg-show-token-rate.md | 246 -- docs/cli/tg-show-tools.md | 283 -- docs/cli/tg-start-flow.md | 189 - docs/cli/tg-start-library-processing.md | 563 --- docs/cli/tg-stop-flow.md | 256 -- docs/cli/tg-stop-library-processing.md | 507 --- docs/cli/tg-unload-kg-core.md | 335 -- docs/websocket.html | 3270 +++++++++++++++ specs/README.md | 216 + specs/api/openapi.yaml | 2 +- specs/build-docs.sh | 38 + specs/websocket/STREAMING.md | 357 ++ specs/websocket/asyncapi.yaml | 87 + specs/websocket/channels/socket.yaml | 33 + .../components/messages/ServiceError.yaml | 27 + .../components/messages/ServiceRequest.yaml | 55 + .../components/messages/ServiceResponse.yaml | 32 + .../messages/requests/AgentRequest.yaml | 28 + .../requests/CollectionManagementRequest.yaml | 21 + .../messages/requests/ConfigRequest.yaml | 29 + .../requests/DocumentEmbeddingsRequest.yaml | 27 + .../requests/DocumentLoadRequest.yaml | 27 + .../messages/requests/DocumentRagRequest.yaml | 28 + .../messages/requests/EmbeddingsRequest.yaml | 26 + .../messages/requests/FlowRequest.yaml | 27 + .../requests/GraphEmbeddingsRequest.yaml | 27 + .../messages/requests/GraphRagRequest.yaml | 28 + .../messages/requests/KnowledgeRequest.yaml | 31 + .../messages/requests/LibrarianRequest.yaml | 22 + .../messages/requests/McpToolRequest.yaml | 30 + .../messages/requests/NlpQueryRequest.yaml | 27 + .../messages/requests/ObjectsRequest.yaml | 26 + .../messages/requests/PromptRequest.yaml | 29 + .../requests/StructuredDiagRequest.yaml | 26 + .../requests/StructuredQueryRequest.yaml | 30 + .../requests/TextCompletionRequest.yaml | 28 + .../messages/requests/TextLoadRequest.yaml | 27 + .../messages/requests/TriplesRequest.yaml | 29 + .../components/schemas/ErrorEnvelope.yaml | 37 + .../components/schemas/RequestEnvelope.yaml | 56 + .../components/schemas/ResponseEnvelope.yaml | 35 + 110 files changed, 8325 insertions(+), 23324 deletions(-) delete mode 100644 docs/README.development.md delete mode 100644 docs/README.quickstart-docker-compose.md create mode 100644 docs/api.html delete mode 100644 docs/apis/README.md delete mode 100644 docs/apis/api-agent.md delete mode 100644 docs/apis/api-config.md delete mode 100644 docs/apis/api-core-import-export.md delete mode 100644 docs/apis/api-document-embeddings.md delete mode 100644 docs/apis/api-document-load.md delete mode 100644 docs/apis/api-document-rag.md delete mode 100644 docs/apis/api-embeddings.md delete mode 100644 docs/apis/api-entity-contexts.md delete mode 100644 docs/apis/api-flow.md delete mode 100644 docs/apis/api-graph-embeddings.md delete mode 100644 docs/apis/api-graph-rag.md delete mode 100644 docs/apis/api-knowledge.md delete mode 100644 docs/apis/api-librarian.md delete mode 100644 docs/apis/api-mcp-tool.md delete mode 100644 docs/apis/api-metrics.md delete mode 100644 docs/apis/api-prompt.md delete mode 100644 docs/apis/api-text-completion.md delete mode 100644 docs/apis/api-text-load.md delete mode 100644 docs/apis/api-triples-query.md delete mode 100644 docs/apis/pulsar.md delete mode 100644 docs/apis/websocket.md delete mode 100644 docs/cli/README.md delete mode 100644 docs/cli/tg-add-library-document.md delete mode 100644 docs/cli/tg-delete-flow-blueprint.md delete mode 100644 docs/cli/tg-delete-kg-core.md delete mode 100644 docs/cli/tg-delete-mcp-tool.md delete mode 100644 docs/cli/tg-delete-tool.md delete mode 100644 docs/cli/tg-dump-msgpack.md delete mode 100644 docs/cli/tg-get-flow-blueprint.md delete mode 100644 docs/cli/tg-get-kg-core.md delete mode 100644 docs/cli/tg-graph-to-turtle.md delete mode 100644 docs/cli/tg-init-pulsar-manager.md delete mode 100644 docs/cli/tg-init-trustgraph.md delete mode 100644 docs/cli/tg-invoke-agent.md delete mode 100644 docs/cli/tg-invoke-document-rag.md delete mode 100644 docs/cli/tg-invoke-graph-rag.md delete mode 100644 docs/cli/tg-invoke-llm.md delete mode 100644 docs/cli/tg-invoke-mcp-tool.md delete mode 100644 docs/cli/tg-invoke-prompt.md delete mode 100644 docs/cli/tg-load-doc-embeds.md delete mode 100644 docs/cli/tg-load-kg-core.md delete mode 100644 docs/cli/tg-load-pdf.md delete mode 100644 docs/cli/tg-load-sample-documents.md delete mode 100644 docs/cli/tg-load-text.md delete mode 100644 docs/cli/tg-load-turtle.md delete mode 100644 docs/cli/tg-put-flow-blueprint.md delete mode 100644 docs/cli/tg-put-kg-core.md delete mode 100644 docs/cli/tg-remove-library-document.md delete mode 100644 docs/cli/tg-save-doc-embeds.md delete mode 100644 docs/cli/tg-set-mcp-tool.md delete mode 100644 docs/cli/tg-set-prompt.md delete mode 100644 docs/cli/tg-set-token-costs.md delete mode 100644 docs/cli/tg-set-tool.md delete mode 100644 docs/cli/tg-show-config.md delete mode 100644 docs/cli/tg-show-flow-blueprints.md delete mode 100644 docs/cli/tg-show-flow-state.md delete mode 100644 docs/cli/tg-show-flows.md delete mode 100644 docs/cli/tg-show-graph.md delete mode 100644 docs/cli/tg-show-kg-cores.md delete mode 100644 docs/cli/tg-show-library-documents.md delete mode 100644 docs/cli/tg-show-library-processing.md delete mode 100644 docs/cli/tg-show-processor-state.md delete mode 100644 docs/cli/tg-show-prompts.md delete mode 100644 docs/cli/tg-show-token-costs.md delete mode 100644 docs/cli/tg-show-token-rate.md delete mode 100644 docs/cli/tg-show-tools.md delete mode 100644 docs/cli/tg-start-flow.md delete mode 100644 docs/cli/tg-start-library-processing.md delete mode 100644 docs/cli/tg-stop-flow.md delete mode 100644 docs/cli/tg-stop-library-processing.md delete mode 100644 docs/cli/tg-unload-kg-core.md create mode 100644 docs/websocket.html create mode 100644 specs/README.md create mode 100755 specs/build-docs.sh create mode 100644 specs/websocket/STREAMING.md create mode 100644 specs/websocket/asyncapi.yaml create mode 100644 specs/websocket/channels/socket.yaml create mode 100644 specs/websocket/components/messages/ServiceError.yaml create mode 100644 specs/websocket/components/messages/ServiceRequest.yaml create mode 100644 specs/websocket/components/messages/ServiceResponse.yaml create mode 100644 specs/websocket/components/messages/requests/AgentRequest.yaml create mode 100644 specs/websocket/components/messages/requests/CollectionManagementRequest.yaml create mode 100644 specs/websocket/components/messages/requests/ConfigRequest.yaml create mode 100644 specs/websocket/components/messages/requests/DocumentEmbeddingsRequest.yaml create mode 100644 specs/websocket/components/messages/requests/DocumentLoadRequest.yaml create mode 100644 specs/websocket/components/messages/requests/DocumentRagRequest.yaml create mode 100644 specs/websocket/components/messages/requests/EmbeddingsRequest.yaml create mode 100644 specs/websocket/components/messages/requests/FlowRequest.yaml create mode 100644 specs/websocket/components/messages/requests/GraphEmbeddingsRequest.yaml create mode 100644 specs/websocket/components/messages/requests/GraphRagRequest.yaml create mode 100644 specs/websocket/components/messages/requests/KnowledgeRequest.yaml create mode 100644 specs/websocket/components/messages/requests/LibrarianRequest.yaml create mode 100644 specs/websocket/components/messages/requests/McpToolRequest.yaml create mode 100644 specs/websocket/components/messages/requests/NlpQueryRequest.yaml create mode 100644 specs/websocket/components/messages/requests/ObjectsRequest.yaml create mode 100644 specs/websocket/components/messages/requests/PromptRequest.yaml create mode 100644 specs/websocket/components/messages/requests/StructuredDiagRequest.yaml create mode 100644 specs/websocket/components/messages/requests/StructuredQueryRequest.yaml create mode 100644 specs/websocket/components/messages/requests/TextCompletionRequest.yaml create mode 100644 specs/websocket/components/messages/requests/TextLoadRequest.yaml create mode 100644 specs/websocket/components/messages/requests/TriplesRequest.yaml create mode 100644 specs/websocket/components/schemas/ErrorEnvelope.yaml create mode 100644 specs/websocket/components/schemas/RequestEnvelope.yaml create mode 100644 specs/websocket/components/schemas/ResponseEnvelope.yaml diff --git a/docs/README.development.md b/docs/README.development.md deleted file mode 100644 index 82e41e86..00000000 --- a/docs/README.development.md +++ /dev/null @@ -1,82 +0,0 @@ - -# Contributing - -## Generally - -Branching is good discipline to get into with multiple people working -on the same repo for different reasons. - -To create a branch... - -- `git checkout -b etl` # to create the branch and check it out -- `git push` # to push the branch head to the upstream repo. You get an error and a command to run. You don't have to do this straight away, but I like to get the BS admin out the way. At this stage your branch HEAD points to the head of main. - -## Adding a new module - -So, to add a new module... - -- It needs a name. Say `kg-mymodule` but you can call it what you like. -- It also needs a place in the Python package hierarchy, because it's - basically going to be its own loadable module. We have a `trustgraph.kg` - module it can be a child of. So, you need a directory - `trustgraph/kg/mymodule` -- You need three files: - - `__init__.py` which defines the module entry point. - - Then, `__main__.py` means the module is executable. - - Finally a module to contain the code, let's call it `extract.py`. - The name doesn't matter but it has to match what's in `__init__.py` and - `__main__.py`. -- The easiest way to get start is maybe make a copy of an existing module. -- `cp -r trustgraph/kg/extract_relationships trustgraph/kg/mymodule/` -- Finally you need a script entry point, in `scripts`. Copy - `scripts/kg-extract-relationships` to `scripts/kg-mymodule` -- In that `kg-mymodule` file, change the import line to import your module, - `trustgraph.kg.mymodule`. - -## Development testing - -To run your module, you don't need to have it running in a container. -It can connect to Pulsar. - -The plumbing for your new module pretty needs to be right. Look at the -input_queue, output_queue and subscriber settings near the top of your -new module code. - -So, before changing the code any more, if you copied an existing module, -check the plumbing works with your renamed module. - -To run standalone, it is recommended to take an existing docker-compose -file, run everything you need except the module you're developing. - -Then when you launch with docker compose, you'll get everything running -except your module. - -To run your module, you need to set up the Python environment as you did -in the quickstart e.g. run `. env/bin/activate` and `export PYTHONPATH=.` - -You're not running kg-mymodule in a container, so it can't use docker -internal DNS to get to the containers, but the docker compose file -exposes everything to the host anyway. You should be able to access Pulsar -on localhost port 6650, for instance. - -You should be able to run your module on the host and point at Pulsar thus: - -```bash -scripts/kg-mymodule -p pulsar://localhost:6650 -``` - -You could try loading data, and check some stuff ends up in the graph. If you get that far you're ready to hack the contents of extract.py to -do what you want. - -## Structure of the code - -The Processor class, `run` method is where all the fun takes place. - -``` - while True: - msg = self.consumer.receive() -``` - -That bit :point_up: is a loop which is executed every time a new message -arrives. - diff --git a/docs/README.md b/docs/README.md index f760d55c..35d70763 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,59 +1,21 @@ -# TrustGraph Documentation Index +# TrustGraph Documentation -Welcome to the TrustGraph documentation. This directory contains comprehensive guides for using TrustGraph's APIs and command-line tools. +Welcome to TrustGraph! For comprehensive documentation, please visit: -## Documentation Overview +## 📖 [https://docs.trustgraph.ai](https://docs.trustgraph.ai) -### 📚 [API Documentation](apis/README.md) -Complete reference for TrustGraph's APIs, including REST, WebSocket, Pulsar, and Python SDK interfaces. Learn how to integrate TrustGraph services into your applications. +The main documentation site includes: -### 🖥️ [CLI Documentation](cli/README.md) -Comprehensive guide to TrustGraph's command-line interface. Includes detailed documentation for all CLI commands, from system administration to knowledge graph management. - -### 🚀 [Quick Start Guide](README.quickstart-docker-compose.md) -Step-by-step guide to get TrustGraph running using Docker Compose. Perfect for first-time users who want to quickly deploy and test TrustGraph. +- **[Overview](https://docs.trustgraph.ai/overview)** - Introduction to TrustGraph concepts and architecture +- **[Guides](https://docs.trustgraph.ai/guides)** - Step-by-step tutorials and how-to guides +- **[Deployment](https://docs.trustgraph.ai/deployment)** - Deployment options and configuration +- **[Reference](https://docs.trustgraph.ai/reference)** - API specifications and CLI documentation ## Getting Started -If you're new to TrustGraph, we recommend starting with the -[Compose - Quick Start Guide](README.quickstart-docker-compose.md) -to get a working system up and running quickly. +**New to TrustGraph?** Start with the [Overview](https://docs.trustgraph.ai/overview) to understand the system. -For developers integrating TrustGraph into applications, check out the -[API Documentation](apis/README.md) to understand the available interfaces. +**Ready to deploy?** Check out the [Deployment Guide](https://docs.trustgraph.ai/deployment). -For system administrators and power users, the -[CLI Documentation](cli/README.md) provides detailed information about all -command-line tools. +**Integrating with code?** See the [API Reference](https://docs.trustgraph.ai/reference) for REST, WebSocket, and SDK documentation. -## Ways to deploy - -If you haven't deployed TrustGraph before, the 'compose' deployment -mentioned above is going to be the least commitment of setting things up: -See [Quick Start Guide](README.quickstart-docker-compose.md) - -Other deployment mechanisms include: -- [Scaleway Kubernetes deployment using Pulumi](https://github.com/trustgraph-ai/pulumi-trustgraph-scaleway) -- [Intel Gaudi and GPU](https://github.com/trustgraph-ai/trustgraph-tiber-cloud) - tested on Intel Tiber cloud -- [Azure Kubernetes deployment using Pulumi](https://github.com/trustgraph-ai/pulumi-trustgraph-aks) -- [AWS EC2 single instance deployment using Pulumi](https://github.com/trustgraph-ai/pulumi-trustgraph-ec2) -- [GCP GKE cloud deployment using Pulumi](https://github.com/trustgraph-ai/pulumi-trustgraph-gke) -- [RKE Kubernetes on AWS deployment using Pulumi](https://github.com/trustgraph-ai/pulumi-trustgraph-aws-rke) -- It should be possible to deploy on AWS EKS, but we haven't been able to - script anything reliable so far. - -## Support - -For questions, issues, or contributions: - -- **GitHub Issues**: Report bugs and feature requests -- **Documentation**: This documentation covers most use cases -- **Community**: Join discussions and share experiences - -## Related Resources - -- [TrustGraph GitHub Repository](https://github.com/trustgraph-ai/trustgraph) -- [Docker Hub Images](https://hub.docker.com/u/trustgraph) -- [Example Notebooks](https://github.com/trustgraph-ai/example-notebooks) - - shows some example use of various APIs. - diff --git a/docs/README.quickstart-docker-compose.md b/docs/README.quickstart-docker-compose.md deleted file mode 100644 index cf8a042f..00000000 --- a/docs/README.quickstart-docker-compose.md +++ /dev/null @@ -1,185 +0,0 @@ - -# Getting Started - -## Preparation - -> [!TIP] -> Before launching `TrustGraph`, be sure to have the `Docker Engine` or `Podman Machine` installed and running on the host machine. -> -> - [Install the Docker Engine](https://docs.docker.com/engine/install/) -> - [Install the Podman Machine](http://podman.io/) - -> [!NOTE] -> `TrustGraph` has been tested on `Linux` and `MacOS` with `Docker` and `Podman`. `Windows` deployments have not been tested. - -> [!TIP] -> If using `Podman`, the only change will be to substitute `podman` instead of `docker` in all commands. - -## Create the configuration - -This guide talks you through the Compose file launch, which is the easiest -way to lauch on a standalone machine, or a single cloud instance. -See [README](README.md) for links to other deployment mechanisms. - -To create the deployment configuration, go to the -[deployment portal](https://config-ui.demo.trustgraph.ai/) and follow the -instructions. -- Select Docker Compose or Podman Compose as the deployment - mechanism. -- Use Cassandra for the graph store, it's easiest and most tested. -- Use Qdrant for the vector store, it's easiest and most tested. -- Chunker: Recursive, chunk size of 1000, 50 overlap should be fine. -- Pick your favourite LLM model: - - If you have enough horsepower in a local GPU, LMStudio is an easy - starting point for a local model deployment. Ollama is fairly easy. - - VertexAI on Google is relatively straightforward for a cloud - model-as-a-service LLM, and you can get some free credits. -- Max output tokens as per the model, 2048 is safe. -- Customisation, check LLM Prompt Manager and Agent Tools. -- Finish deployment, Generate and download the deployment bundle. - Read the extra deploy steps on that page. - -## Preparing TrustGraph - -Below is a step-by-step guide to deploy `TrustGraph`, extract knowledge from a PDF, build the vector and graph stores, and finally generate responses with Graph RAG. - -### Install requirements - -``` -python3 -m venv env -. env/bin/activate -pip install trustgraph-cli -``` -## Running TrustGraph - -``` -docker-compose -f docker-compose.yaml up -d -``` - -After running the chosen `Docker Compose` file, all `TrustGraph` services will launch and be ready to run `Naive Extraction` jobs and provide `RAG` responses using the extracted knowledge. - -### Verify TrustGraph Containers - -On first running a `Docker Compose` file, it may take a while (depending on your network connection) to pull all the necessary components. Once all of the components have been pulled. - -A quick check that TrustGraph processors have started: - -``` -tg-show-processor-state -``` - -Processors start quickly, but can take a while (~60 seconds) for -Pulsar and Cassandra to start. - -If you have any concerns, -check that the TrustGraph containers are running: - -``` -docker ps -``` - -Any containers that have exited unexpectedly can be found by checking the `STATUS` field using the following: - -``` -docker ps -a -``` - -> [!TIP] -> Before proceeding, allow the system to stabilize. A safe warm up period is `120 seconds`. If services seem to be "stuck", it could be because services did not have time to initialize correctly and are trying to restart. Waiting `120 seconds` before launching any scripts should provide much more reliable operation. - -### Everything running - -An easy way to check all the main start is complete: - -``` -tg-show-flows -``` - -You should see a default flow. If you see an error, leave it and try again. - -### Load some sample documents - -``` -tg-load-sample-documents -``` - -### Workbench - -A UI is launched on port 8888, see if you can see it at -[http://localhost:8888/](http://localhost:8888/) - -Verify things are working: -- Go to the prompts page see that you can see some prompts -- Go to the library page, and check you can see the sample documents you - just loaded. - -### Load a document - -- On the library page, select a document. Beyond State Vigilance is a - smallish doc to work with. -- Select the doc by clicking on it. -- Select Submit at the bottom of the screen on the action bar. -- Select a processing flow, use the default. -- Click submit. - -### Look in Grafana - -A Grafana is launched on port 3000, see if you can see it at -[http://localhost:3000/](http://localhost:3000/) - -- Login as admin, password admin. -- Skip the password change screen / change the password. -- Verify things are working by selecting the TrustGraph dashboard -- After a short while, you should see the backlog rise to a few hundred - document chunks. - -Once some chunks are loaded, you can start to work with the document. - -### Graph Parsing - -To check that the knowledge graph is successfully parsing data: - -``` -tg-show-graph -``` - -The output should be a set of semantic triples in [N-Triples](https://www.w3.org/TR/rdf12-n-triples/) format. - -``` -http://trustgraph.ai/e/enterprise http://trustgraph.ai/e/was-carried to altitude and released for a gliding approach and landing at the Mojave Desert test center. -http://trustgraph.ai/e/enterprise http://www.w3.org/2000/01/rdf-schema#label Enterprise. -http://trustgraph.ai/e/enterprise http://www.w3.org/2004/02/skos/core#definition A prototype space shuttle orbiter used for atmospheric flight testing. -``` - -### Work with the document - -Back on the workbench, click on the 'Vector search' tab, and -search for something e.g. state. You should see some search results. -Click on results to start exploring the knowledge graph. - -Click on Graph view on an explored page to visualize the graph. - -### Queries over the document - -On workbench, click Graph RAG and enter a question e.g. -What is this document about? - -### Shutting Down TrustGraph - -When shutting down `TrustGraph`, it's best to shut down all Docker containers and volumes. Run the `docker compose down` command that corresponds to your model and graph store deployment: - -``` -docker compose -f document-compose.yaml down -v -t 0 -``` - -> [!TIP] -> To confirm all Docker containers have been shut down, check that the following list is empty: -> ``` -> docker ps -> ``` -> -> To confirm all Docker volumes have been removed, check that the following list is empty: -> ``` -> docker volume ls -> ``` - diff --git a/docs/api.html b/docs/api.html new file mode 100644 index 00000000..201771ec --- /dev/null +++ b/docs/api.html @@ -0,0 +1,3497 @@ + + + + + + TrustGraph API Gateway + + + + + + + + + +

TrustGraph API Gateway (1.8)

Download OpenAPI specification:

REST API for TrustGraph - an AI-powered knowledge graph and RAG system.

+

Overview

The API provides access to:

+
    +
  • Global Services: Configuration, flow management, knowledge storage, library management
  • +
  • Flow-Hosted Services: AI services like RAG, text completion, embeddings (require running flow)
  • +
  • Import/Export: Bulk data operations for triples, embeddings, entity contexts
  • +
  • WebSocket: Multiplexed interface for all services
  • +
+

Service Types

Global Services

+

Fixed endpoints accessible via /api/v1/{kind}:

+
    +
  • config - Configuration management
  • +
  • flow - Flow lifecycle and blueprints
  • +
  • librarian - Document library management
  • +
  • knowledge - Knowledge graph core management
  • +
  • collection-management - Collection metadata
  • +
+

Flow-Hosted Services

+

Require running flow instance, accessed via /api/v1/flow/{flow}/service/{kind}:

+
    +
  • AI services: agent, text-completion, prompt, RAG (document/graph)
  • +
  • Embeddings: embeddings, graph-embeddings, document-embeddings
  • +
  • Query: triples, objects, nlp-query, structured-query
  • +
  • Data loading: text-load, document-load
  • +
  • Utilities: mcp-tool, structured-diag
  • +
+

Authentication

Bearer token authentication when GATEWAY_SECRET environment variable is set. +Include token in Authorization header:

+
Authorization: Bearer <token>
+
+

If GATEWAY_SECRET is not set, API runs without authentication (development mode).

+

Field Naming

All JSON fields use kebab-case: flow-id, blueprint-name, doc-limit, etc.

+

Error Responses

All endpoints may return errors in this format:

+
{
+  "error": {
+    "type": "gateway-error",
+    "message": "Timeout"
+  }
+}
+
+

Config

Configuration management (global service)

+

Configuration service

Manage TrustGraph configuration including flows, prompts, token costs, parameter types, and more.

+

Operations

+

config

+

Get the complete system configuration including all flows, prompts, token costs, etc.

+

list

+

List all configuration items of a specific type (e.g., all flows, all prompts).

+

get

+

Retrieve specific configuration items by type and key.

+

put

+

Create or update configuration values.

+

delete

+

Delete configuration items.

+

Configuration Types

+
    +
  • flow - Flow instance definitions
  • +
  • flow-blueprint - Flow blueprint definitions (stored separately from flow instances)
  • +
  • prompt - Prompt templates
  • +
  • token-cost - Model token pricing
  • +
  • parameter-type - Parameter type definitions
  • +
  • interface-description - Interface descriptions
  • +
  • Custom types as needed
  • +
+

Important Distinction

+

The config service manages stored configuration. +The flow service (/api/v1/flow) manages running flow instances.

+
    +
  • Use config service to store/retrieve flow definitions
  • +
  • Use flow service to start/stop/manage running flows
  • +
+
Authorizations:
bearerAuth
Request Body schema: application/json
required
operation
required
string
Enum: "config" "list" "get" "put" "delete"

Operation to perform:

+
    +
  • config: Get complete configuration
  • +
  • list: List all items of a specific type
  • +
  • get: Get specific configuration items
  • +
  • put: Set/update configuration values
  • +
  • delete: Delete configuration items
  • +
+
type
string

Configuration type (required for list, get, put, delete operations). +Common types: flow, prompt, token-cost, parameter-type, interface-description

+
Array of objects

Keys to retrieve (for get operation) or delete (for delete operation)

+
Array of objects

Values to set/update (for put operation)

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "config"
}

Response samples

Content type
application/json
Example
{
  • "version": 42,
  • "config": {
    }
}

Flow

Flow lifecycle and blueprint management (global service)

+

Flow lifecycle and blueprint management

Manage flow instances and blueprints.

+

Important Distinction

+

The flow service manages running flow instances. +The config service (/api/v1/config) manages stored configuration.

+
    +
  • Use flow service to start/stop/manage running flows
  • +
  • Use config service to store/retrieve flow definitions
  • +
+

Flow Instance Operations

+

start-flow

+

Start a new flow instance from a blueprint. The blueprint must exist (either built-in or created via put-blueprint).

+

Parameters are resolved from:

+
    +
  1. User-provided values (--param)
  2. +
  3. Default values from parameter type definitions
  4. +
  5. Controlled-by relationships
  6. +
+

stop-flow

+

Stop a running flow instance. This terminates all processors and releases resources.

+

list-flows

+

List all currently running flow instances.

+

get-flow

+

Get details of a running flow including its configuration, parameters, and interface queue names.

+

Blueprint Operations

+

list-blueprints

+

List all available flow blueprints (built-in and custom).

+

get-blueprint

+

Retrieve a blueprint definition showing its structure, parameters, processors, and interfaces.

+

put-blueprint

+

Create or update a flow blueprint definition.

+

Blueprints define:

+
    +
  • Class processors: Shared across all instances of this blueprint
  • +
  • Flow processors: Unique to each flow instance
  • +
  • Interfaces: Entry points for external systems
  • +
  • Parameters: Configurable values for customization
  • +
+

delete-blueprint

+

Delete a custom blueprint definition. Built-in blueprints cannot be deleted.

+
Authorizations:
bearerAuth
Request Body schema: application/json
required
operation
required
string
Enum: "start-flow" "stop-flow" "list-flows" "get-flow" "list-blueprints" "get-blueprint" "put-blueprint" "delete-blueprint"

Flow operation:

+
    +
  • start-flow: Start a new flow instance from a blueprint
  • +
  • stop-flow: Stop a running flow instance
  • +
  • list-flows: List all running flow instances
  • +
  • get-flow: Get details of a running flow
  • +
  • list-blueprints: List available flow blueprints
  • +
  • get-blueprint: Get blueprint definition
  • +
  • put-blueprint: Create/update blueprint definition
  • +
  • delete-blueprint: Delete blueprint definition
  • +
+
flow-id
string

Flow instance ID (required for start-flow, stop-flow, get-flow)

+
blueprint-name
string

Flow blueprint name (required for start-flow, get-blueprint, put-blueprint, delete-blueprint)

+
object

Flow blueprint definition (required for put-blueprint)

+
description
string

Flow description (optional for start-flow)

+
object

Flow parameters (for start-flow). +All values are stored as strings, regardless of input type.

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "start-flow",
  • "flow-id": "my-flow",
  • "blueprint-name": "document-rag",
  • "description": "My document processing flow",
  • "parameters": {
    }
}

Response samples

Content type
application/json
Example
{
  • "flow-id": "my-flow"
}

Librarian

Document library management (global service)

+

Document library management

Manage document library: add, remove, list documents, and control processing.

+

Document Library

+

The librarian service manages a persistent library of documents that can be:

+
    +
  • Added with metadata for organization
  • +
  • Queried and filtered by criteria
  • +
  • Processed through flows on-demand or continuously
  • +
  • Tracked for processing status
  • +
+

Operations

+

add-document

+

Add a document to the library with metadata (URL, title, author, etc.). +Documents can be added by URL or with inline content.

+

remove-document

+

Remove a document from the library by document ID or URL.

+

list-documents

+

List all documents in the library, optionally filtered by criteria.

+

start-processing

+

Start processing library documents through a flow. Documents are queued +for processing and handled asynchronously.

+

stop-processing

+

Stop ongoing library document processing.

+

list-processing

+

List current processing tasks and their status.

+
Authorizations:
bearerAuth
Request Body schema: application/json
required
operation
required
string
Enum: "add-document" "remove-document" "list-documents" "start-processing" "stop-processing" "list-processing"

Library operation:

+
    +
  • add-document: Add document to library
  • +
  • remove-document: Remove document from library
  • +
  • list-documents: List documents in library
  • +
  • start-processing: Start processing library documents
  • +
  • stop-processing: Stop library processing
  • +
  • list-processing: List processing status
  • +
+
flow
string

Flow ID

+
collection
string
Default: "default"

Collection identifier

+
user
string
Default: "trustgraph"

User identifier

+
document-id
string

Document identifier

+
processing-id
string

Processing task identifier

+
object (DocumentMetadata)

Document metadata for library management

+
object (ProcessingMetadata)

Processing metadata for library document processing

+
content
string

Document content (for add-document with inline content)

+
Array of objects

Search criteria for filtering documents

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "add-document",
  • "flow": "my-flow",
  • "collection": "default",
  • "document-metadata": {}
}

Response samples

Content type
application/json
Example
{}

Knowledge

Knowledge graph core management (global service)

+

Knowledge graph core management

Manage knowledge graph cores - persistent storage of triples and embeddings.

+

Knowledge Cores

+

Knowledge cores are the foundational storage units for:

+
    +
  • Triples: RDF triples representing knowledge graph data
  • +
  • Graph Embeddings: Vector embeddings for entities
  • +
  • Metadata: Descriptive information about the knowledge
  • +
+

Each core has an ID, user, and collection for organization.

+

Operations

+

list-kg-cores

+

List all knowledge cores for a user. Returns array of core IDs.

+

get-kg-core

+

Retrieve a knowledge core by ID. Returns triples and/or graph embeddings. +Response is streamed - may receive multiple messages followed by EOS marker.

+

put-kg-core

+

Store triples and/or graph embeddings. Creates new core or updates existing. +Can store triples only, embeddings only, or both together.

+

delete-kg-core

+

Delete a knowledge core by ID. Removes all associated data.

+

load-kg-core

+

Load a knowledge core into a running flow's collection. +Makes the data available for querying within that flow instance.

+

unload-kg-core

+

Unload a knowledge core from a flow's collection. +Removes data from flow instance but doesn't delete the core.

+

Streaming Responses

+

The get-kg-core operation streams data in chunks:

+
    +
  1. Multiple messages with triples or graph-embeddings
  2. +
  3. Final message with eos: true to signal completion
  4. +
+
Authorizations:
bearerAuth
Request Body schema: application/json
required
operation
required
string
Enum: "list-kg-cores" "get-kg-core" "put-kg-core" "delete-kg-core" "load-kg-core" "unload-kg-core"

Knowledge core operation:

+
    +
  • list-kg-cores: List knowledge cores for user
  • +
  • get-kg-core: Get knowledge core by ID
  • +
  • put-kg-core: Store triples and/or embeddings
  • +
  • delete-kg-core: Delete knowledge core by ID
  • +
  • load-kg-core: Load knowledge core into flow
  • +
  • unload-kg-core: Unload knowledge core from flow
  • +
+
user
string
Default: "trustgraph"

User identifier (for list-kg-cores, put-kg-core, delete-kg-core)

+
id
string

Knowledge core ID (for get, put, delete, load, unload)

+
flow
string

Flow ID (for load-kg-core)

+
collection
string
Default: "default"

Collection identifier (for load-kg-core)

+
object

Triples to store (for put-kg-core)

+
object

Graph embeddings to store (for put-kg-core)

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "list-kg-cores",
  • "user": "alice"
}

Response samples

Content type
application/json
Example
{
  • "ids": [
    ]
}

Collection

Collection metadata management (global service)

+

Collection metadata management

Manage collection metadata for organizing documents and knowledge.

+

Collections

+

Collections are organizational units for grouping:

+
    +
  • Documents in the librarian
  • +
  • Knowledge cores
  • +
  • User data
  • +
+

Each collection has:

+
    +
  • user: Owner identifier
  • +
  • collection: Unique collection ID
  • +
  • name: Human-readable display name
  • +
  • description: Purpose and contents
  • +
  • tags: Labels for filtering and organization
  • +
+

Operations

+

list-collections

+

List all collections for a user. Optionally filter by tags and limit results. +Returns array of collection metadata.

+

update-collection

+

Create or update collection metadata. If collection doesn't exist, it's created. +If it exists, metadata is updated. Allows setting name, description, and tags.

+

delete-collection

+

Delete a collection by user and collection ID. This removes the metadata but +typically does not delete the associated data (documents, knowledge cores).

+
Authorizations:
bearerAuth
Request Body schema: application/json
required
operation
required
string
Enum: "list-collections" "update-collection" "delete-collection"

Collection operation:

+
    +
  • list-collections: List collections for user
  • +
  • update-collection: Create or update collection metadata
  • +
  • delete-collection: Delete collection
  • +
+
user
string
Default: "trustgraph"

User identifier

+
collection
string

Collection identifier (for update, delete)

+
timestamp
string <date-time>

ISO timestamp

+
name
string

Human-readable collection name (for update)

+
description
string

Collection description (for update)

+
tags
Array of strings

Collection tags for organization (for update)

+
tag-filter
Array of strings

Filter collections by tags (for list)

+
limit
integer
Default: 0

Maximum number of results (for list)

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "list-collections",
  • "user": "alice"
}

Response samples

Content type
application/json
Example
{
  • "timestamp": "2024-01-15T10:30:00Z",
  • "collections": [
    ]
}

Flow Services

Services hosted within flow instances

+

Agent service - conversational AI with reasoning

AI agent that can understand questions, reason about them, and take actions.

+

Agent Overview

+

The agent service provides a conversational AI that:

+
    +
  • Understands natural language questions
  • +
  • Reasons about problems using thoughts
  • +
  • Takes actions to gather information
  • +
  • Provides coherent answers
  • +
+

Request Format

+

Send a question with optional:

+
    +
  • state: Continue from previous conversation
  • +
  • history: Previous agent steps for context
  • +
  • group: Collaborative agent identifiers
  • +
  • streaming: Enable streaming responses
  • +
+

Response Modes

+

Streaming Mode (streaming: true)

+

Responses arrive as chunks with chunk-type:

+
    +
  • thought: Agent's reasoning process
  • +
  • action: Action being taken
  • +
  • observation: Result from action
  • +
  • answer: Final response to user
  • +
  • error: Error occurred
  • +
+

Each chunk may have multiple messages. Check flags:

+
    +
  • end-of-message: Current chunk type complete
  • +
  • end-of-dialog: Entire conversation complete
  • +
+

Legacy Mode (streaming: false)

+

Single response with:

+
    +
  • answer: Complete answer
  • +
  • thought: Reasoning (if any)
  • +
  • observation: Observations (if any)
  • +
+

Multi-turn Conversations

+

Include history array with previous steps to maintain context. +Each step has: thought, action, arguments, observation.

+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
question
required
string

User question or prompt for the agent

+
state
string

Agent state for continuation (optional, for multi-turn)

+
group
Array of strings

Group identifiers for collaborative agents (optional)

+
Array of objects

Conversation history (optional, list of previous agent steps)

+
user
string
Default: "trustgraph"

User identifier for multi-tenancy

+
streaming
boolean
Default: false

Enable streaming response delivery

+

Responses

Request samples

Content type
application/json
Example
{
  • "question": "What is the capital of France?",
  • "user": "alice"
}

Response samples

Content type
application/json
Example
{
  • "chunk-type": "thought",
  • "content": "I need to search for information about quantum computing",
  • "end-of-message": false,
  • "end-of-dialog": false
}

Document RAG - retrieve and generate from documents

Retrieval-Augmented Generation over document embeddings.

+

Document RAG Overview

+

Document RAG combines:

+
    +
  1. Retrieval: Search document embeddings using semantic similarity
  2. +
  3. Generation: Use LLM to synthesize answer from retrieved documents
  4. +
+

This provides grounded answers based on your document corpus.

+

Query Process

+
    +
  1. Convert query to embedding
  2. +
  3. Search document embeddings for most similar chunks
  4. +
  5. Retrieve top N document chunks (configurable via doc-limit)
  6. +
  7. Pass query + retrieved context to LLM
  8. +
  9. Generate answer grounded in documents
  10. +
+

Streaming

+

Enable streaming: true to receive the answer as it's generated:

+
    +
  • Multiple messages with response content
  • +
  • Final message with end-of-stream: true
  • +
+

Without streaming, returns complete answer in single response.

+

Parameters

+
    +
  • doc-limit: Controls retrieval depth (1-100, default 20)
      +
    • Higher = more context but slower
    • +
    • Lower = faster but may miss relevant info
    • +
    +
  • +
  • collection: Target specific document collection
  • +
  • user: Multi-tenant isolation
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
query
required
string

User query or question

+
user
string
Default: "trustgraph"

User identifier for multi-tenancy

+
collection
string
Default: "default"

Collection to search within

+
doc-limit
integer [ 1 .. 100 ]
Default: 20

Maximum number of documents to retrieve

+
streaming
boolean
Default: false

Enable streaming response delivery

+

Responses

Request samples

Content type
application/json
Example
{
  • "query": "What are the key findings in the research papers?",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
Example
{
  • "response": "The research papers present three key findings:\n1. Quantum entanglement exhibits non-local correlations\n2. Bell's inequality is violated in experimental tests\n3. Applications in quantum cryptography are promising\n",
  • "end-of-stream": false
}

Graph RAG - retrieve and generate from knowledge graph

Retrieval-Augmented Generation over knowledge graph.

+

Graph RAG Overview

+

Graph RAG combines:

+
    +
  1. Retrieval: Find relevant entities and subgraph from knowledge graph
  2. +
  3. Generation: Use LLM to reason over graph structure and generate answer
  4. +
+

This provides graph-aware answers that leverage relationships and structure.

+

Query Process

+
    +
  1. Identify relevant entities from query (using embeddings)
  2. +
  3. Retrieve connected subgraph around entities
  4. +
  5. Optionally traverse paths up to max-path-length hops
  6. +
  7. Limit subgraph size to stay within context window
  8. +
  9. Pass query + graph structure to LLM
  10. +
  11. Generate answer incorporating graph relationships
  12. +
+

Streaming

+

Enable streaming: true to receive the answer as it's generated:

+
    +
  • Multiple messages with response content
  • +
  • Final message with end-of-stream: true
  • +
+

Without streaming, returns complete answer in single response.

+

Parameters

+

Control retrieval scope with multiple knobs:

+
    +
  • entity-limit: How many starting entities to find (1-200, default 50)
  • +
  • triple-limit: Triples per entity (1-100, default 30)
  • +
  • max-subgraph-size: Total subgraph cap (10-5000, default 1000)
  • +
  • max-path-length: Graph traversal depth (1-5, default 2)
  • +
+

Higher limits = more context but:

+
    +
  • Slower retrieval
  • +
  • Larger context for LLM
  • +
  • May hit context window limits
  • +
+

Use Cases

+

Best for queries requiring:

+
    +
  • Relationship understanding ("How are X and Y connected?")
  • +
  • Multi-hop reasoning ("What's the path from A to B?")
  • +
  • Structural analysis ("What are the main entities related to X?")
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
query
required
string

User query or question

+
user
string
Default: "trustgraph"

User identifier for multi-tenancy

+
collection
string
Default: "default"

Collection to search within

+
entity-limit
integer [ 1 .. 200 ]
Default: 50

Maximum number of entities to retrieve

+
triple-limit
integer [ 1 .. 100 ]
Default: 30

Maximum number of triples to retrieve per entity

+
max-subgraph-size
integer [ 10 .. 5000 ]
Default: 1000

Maximum total subgraph size (triples)

+
max-path-length
integer [ 1 .. 5 ]
Default: 2

Maximum path length for graph traversal

+
streaming
boolean
Default: false

Enable streaming response delivery

+

Responses

Request samples

Content type
application/json
Example
{
  • "query": "What connections exist between quantum physics and computer science?",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
Example
{
  • "response": "Quantum physics and computer science intersect primarily through quantum computing.\nThe knowledge graph shows connections through:\n- Quantum algorithms (Shor's algorithm, Grover's algorithm)\n- Quantum information theory\n- Computational complexity theory\n",
  • "end-of-stream": false
}

Text completion - direct LLM generation

Direct text completion using LLM without retrieval augmentation.

+

Text Completion Overview

+

Pure LLM generation for:

+
    +
  • General knowledge questions
  • +
  • Creative writing
  • +
  • Code generation
  • +
  • Analysis and reasoning
  • +
  • Any task not requiring specific document/graph context
  • +
+

System vs Prompt

+
    +
  • system: Sets LLM behavior, role, constraints
      +
    • "You are a helpful assistant"
    • +
    • "You are an expert Python developer"
    • +
    • "Respond in JSON format"
    • +
    +
  • +
  • prompt: The actual user request/question
  • +
+

Streaming

+

Enable streaming: true to receive tokens as generated:

+
    +
  • Multiple messages with partial response
  • +
  • Final message with end-of-stream: true
  • +
+

Without streaming, returns complete response in single message.

+

Token Counting

+

Response includes token usage:

+
    +
  • in-token: Input tokens (system + prompt)
  • +
  • out-token: Generated tokens
  • +
  • Useful for cost tracking and optimization
  • +
+

When to Use

+

Use text-completion when:

+
    +
  • No specific context needed (general knowledge)
  • +
  • System prompt provides sufficient context
  • +
  • Want direct control over prompting
  • +
+

Use document-rag/graph-rag when:

+
    +
  • Need to ground response in specific documents
  • +
  • Want to leverage knowledge graph relationships
  • +
  • Require citations or provenance
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
system
required
string

System prompt that sets behavior and context for the LLM

+
prompt
required
string

User prompt or question

+
streaming
boolean
Default: false

Enable streaming response delivery

+

Responses

Request samples

Content type
application/json
Example
{
  • "system": "You are a helpful assistant that provides concise answers.",
  • "prompt": "Explain the concept of recursion in programming."
}

Response samples

Content type
application/json
Example
{
  • "response": "Recursion is a programming technique where a function calls itself\nto solve a problem by breaking it down into smaller, similar subproblems.\nEach recursive call works on a simpler version until reaching a base case.\n",
  • "in-token": 45,
  • "out-token": 128,
  • "model": "gpt-4",
  • "end-of-stream": false
}

Prompt service - template-based generation

Execute stored prompt templates with variable substitution.

+

Prompt Service Overview

+

The prompt service enables:

+
    +
  • Reusable prompt templates stored in configuration
  • +
  • Variable substitution for dynamic prompts
  • +
  • Consistent prompt engineering across requests
  • +
  • Text or structured object outputs
  • +
+

Template System

+

Prompts are stored via config service (/api/v1/config) with:

+
    +
  • id: Unique prompt identifier
  • +
  • template: Prompt text with {variable} placeholders
  • +
  • system: Optional system prompt
  • +
  • output_format: "text" or "object"
  • +
+

Example template:

+
Summarize the following document in {max_length} words:
+
+{document}
+
+

Variable Substitution

+

Two ways to pass variables:

+
    +
  1. terms (explicit JSON strings):

    +
    {
    +  "terms": {
    +    "document": "\"Text here...\"",
    +    "max_length": "\"200\""
    +  }
    +}
    +
    +
  2. +
  3. variables (auto-converted):

    +
    {
    +  "variables": {
    +    "document": "Text here...",
    +    "max_length": 200
    +  }
    +}
    +
    +
  4. +
+

Output Types

+
    +
  • text: Plain text response in text field
  • +
  • object: Structured JSON in object field (as string)
  • +
+

Streaming

+

Enable streaming: true to receive response incrementally.

+

Use Cases

+
    +
  • Document summarization
  • +
  • Entity extraction
  • +
  • Classification tasks
  • +
  • Data transformation
  • +
  • Any repeatable LLM task with consistent prompting
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
id
required
string

Prompt template ID (stored in config)

+
object

Template variables as key-value pairs (values are JSON strings)

+
object

Alternative to terms - variables as native JSON values (auto-converted)

+
streaming
boolean
Default: false

Enable streaming response delivery

+

Responses

Request samples

Content type
application/json
Example
{
  • "id": "summarize-document",
  • "terms": {
    }
}

Response samples

Content type
application/json
Example
{
  • "text": "This document provides an overview of quantum computing fundamentals and cryptographic applications.",
  • "end-of-stream": false
}

Embeddings - text to vector conversion

Convert text to embedding vectors for semantic similarity search.

+

Embeddings Overview

+

Embeddings transform text into dense vector representations that:

+
    +
  • Capture semantic meaning
  • +
  • Enable similarity comparisons via cosine distance
  • +
  • Support semantic search and retrieval
  • +
  • Power RAG systems
  • +
+

Use Cases

+
    +
  • Document indexing: Convert documents to vectors for storage
  • +
  • Query encoding: Convert search queries for similarity matching
  • +
  • Semantic similarity: Find related texts via vector distance
  • +
  • Clustering: Group similar content
  • +
  • Classification: Use as features for ML models
  • +
+

Vector Dimensions

+

Dimension count depends on embedding model:

+
    +
  • text-embedding-ada-002: 1536 dimensions
  • +
  • text-embedding-3-small: 1536 dimensions
  • +
  • text-embedding-3-large: 3072 dimensions
  • +
  • Custom models: Varies
  • +
+

Single Request

+

Unlike batch embedding APIs, this endpoint processes one text at a time. +For bulk operations, use document-load or text-load services.

+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
text
required
string

Text to convert to embedding vector

+

Responses

Request samples

Content type
application/json
Example
{
  • "text": "Machine learning"
}

Response samples

Content type
application/json
{
  • "vectors": [
    ]
}

MCP Tool - execute Model Context Protocol tools

Execute MCP (Model Context Protocol) tools for agent capabilities.

+

MCP Tool Overview

+

MCP tools provide agent capabilities through standardized protocol:

+
    +
  • Search tools: Web search, document search
  • +
  • Data tools: Database queries, API calls
  • +
  • Action tools: File operations, system commands
  • +
  • Integration tools: Third-party service connectors
  • +
+

Tools extend agent capabilities beyond pure LLM generation.

+

Tool Execution

+

Tools are:

+
    +
  1. Registered via MCP protocol
  2. +
  3. Discovered by agent
  4. +
  5. Called with structured parameters
  6. +
  7. Return text or structured results
  8. +
+

Request Format

+
    +
  • name: Tool identifier (e.g., "search", "calculator", "weather")
  • +
  • parameters: Tool-specific arguments as JSON object
  • +
+

Response Format

+

Tools can return:

+
    +
  • text: Plain text result (simple tools)
  • +
  • object: Structured JSON result (complex tools)
  • +
+

Tool Registration

+

Tools are registered via MCP server configuration:

+
    +
  • Define tool schema (name, parameters, description)
  • +
  • Implement tool handler
  • +
  • Register with MCP server
  • +
  • Agent discovers and uses tool
  • +
+

Use Cases

+
    +
  • Web search: Find external information
  • +
  • Calculator: Perform calculations
  • +
  • Database query: Retrieve structured data
  • +
  • API integration: Call external services
  • +
  • File operations: Read/write files
  • +
  • Code execution: Run scripts
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
name
required
string

Tool name to execute

+
object

Tool parameters (JSON object, auto-converted to string internally)

+

Responses

Request samples

Content type
application/json
Example
{
  • "name": "search",
  • "parameters": {
    }
}

Response samples

Content type
application/json
Example
{
  • "text": "The result is 309"
}

Triples query - pattern-based graph queries

Query knowledge graph using subject-predicate-object patterns.

+

Triples Query Overview

+

Query RDF triples with flexible pattern matching:

+
    +
  • Specify subject, predicate, and/or object
  • +
  • Any combination of filters (all optional)
  • +
  • Returns matching triples up to limit
  • +
+

Pattern Matching

+

Pattern syntax supports:

+
    +
  • All triples: Omit all filters (returns everything up to limit)
  • +
  • Subject match: Specify s only (all triples about that subject)
  • +
  • Predicate match: Specify p only (all uses of that property)
  • +
  • Object match: Specify o only (all triples with that value)
  • +
  • Combinations: Any combination of s/p/o
  • +
+

RDF Value Format

+

Each component (s/p/o) uses RdfValue format:

+
    +
  • Entity/URI: {"v": "https://example.com/entity", "e": true}
  • +
  • Literal: {"v": "Some text", "e": false}
  • +
+

Query Examples

+

Find all properties of an entity:

+
{"s": {"v": "https://example.com/person/alice", "e": true}}
+
+

Find all instances of a type:

+
{
+  "p": {"v": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "e": true},
+  "o": {"v": "https://example.com/type/Person", "e": true}
+}
+
+

Find specific relationship:

+
{
+  "s": {"v": "https://example.com/person/alice", "e": true},
+  "p": {"v": "https://example.com/knows", "e": true}
+}
+
+

Performance

+
    +
  • Default limit: 10,000 triples
  • +
  • Max limit: 100,000 triples
  • +
  • More specific patterns = faster queries
  • +
  • Consider limit for large result sets
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
object (RdfValue)

Subject filter (optional)

+
object (RdfValue)

Predicate filter (optional)

+
object (RdfValue)

Object filter (optional)

+
limit
integer [ 1 .. 100000 ]
Default: 10000

Maximum number of triples to return

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection to query

+

Responses

Request samples

Content type
application/json
Example
{}

Response samples

Content type
application/json
{}

Objects query - GraphQL over knowledge graph

Query knowledge graph using GraphQL for object-oriented data access.

+

Objects Query Overview

+

GraphQL interface to knowledge graph:

+
    +
  • Schema-driven: Predefined types and relationships
  • +
  • Flexible queries: Request exactly what you need
  • +
  • Nested data: Traverse relationships in single query
  • +
  • Type-safe: Strong typing with introspection
  • +
+

Abstracts RDF triples into familiar object model.

+

GraphQL Benefits

+

Compared to triples query:

+
    +
  • Developer-friendly: Objects instead of triples
  • +
  • Efficient: Get related data in one query
  • +
  • Typed: Schema defines available fields
  • +
  • Discoverable: Introspection for tooling
  • +
+

Query Structure

+

Standard GraphQL query format:

+
query OperationName($var: Type!) {
+  fieldName(arg: $var) {
+    subField1
+    subField2
+    nestedObject {
+      nestedField
+    }
+  }
+}
+
+

Variables

+

Pass variables for parameterized queries:

+
{
+  "query": "query GetPerson($id: ID!) { person(id: $id) { name } }",
+  "variables": {"id": "https://example.com/person/alice"}
+}
+
+

Error Handling

+

GraphQL distinguishes:

+
    +
  • Field errors: Invalid query, missing fields (in errors array)
  • +
  • System errors: Connection issues, timeouts (in error object)
  • +
+

Partial data may be returned with field errors.

+

Schema Definition

+

Schema defines available types via config service. +Use introspection query to discover schema.

+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
query
required
string

GraphQL query string

+
object

GraphQL query variables

+
operation-name
string

Operation name (for multi-operation documents)

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection to query

+

Responses

Request samples

Content type
application/json
Example
{
  • "query": "{\n person(id: \"https://example.com/person/alice\") {\n name\n email\n }\n}\n",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
Example
{
  • "data": {
    },
  • "extensions": {
    }
}

NLP Query - natural language to structured query

Convert natural language questions to structured GraphQL queries.

+

NLP Query Overview

+

Transforms user questions into executable GraphQL:

+
    +
  • Natural input: Ask questions in plain English
  • +
  • Structured output: Get GraphQL query + variables
  • +
  • Schema-aware: Uses knowledge graph schema
  • +
  • Confidence scoring: Know how well question was understood
  • +
+

Enables non-technical users to query knowledge graph.

+

Process

+
    +
  1. Parse natural language question
  2. +
  3. Identify entities and relationships
  4. +
  5. Map to GraphQL schema types
  6. +
  7. Generate query with variables
  8. +
  9. Return query + confidence score
  10. +
+

Using Results

+

Generated query can be:

+
    +
  • Executed via objects query service
  • +
  • Inspected and modified if needed
  • +
  • Cached for similar questions
  • +
+

Example workflow:

+
1. User asks: "Who does Alice know?"
+2. NLP Query generates GraphQL
+3. Execute via /api/v1/flow/{flow}/service/objects
+4. Return results to user
+
+

Schema Detection

+

Response includes detected-schemas array showing:

+
    +
  • Which types were identified
  • +
  • What entities were matched
  • +
  • Schema coverage of question
  • +
+

Helps understand query scope.

+

Confidence Scores

+
    +
  • 0.9-1.0: High confidence, likely correct
  • +
  • 0.7-0.9: Good confidence, probably correct
  • +
  • 0.5-0.7: Medium confidence, may need review
  • +
  • < 0.5: Low confidence, likely incorrect
  • +
+

Low scores suggest:

+
    +
  • Ambiguous question
  • +
  • Missing schema coverage
  • +
  • Complex query structure
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
question
required
string

Natural language question

+
max-results
integer [ 1 .. 10000 ]
Default: 100

Maximum results to return when query is executed

+

Responses

Request samples

Content type
application/json
Example
{
  • "question": "Who does Alice know?",
  • "max-results": 50
}

Response samples

Content type
application/json
Example
{
  • "graphql-query": "query GetConnections($person: ID!) {\n person(id: $person) {\n knows { name email }\n }\n}\n",
  • "variables": {},
  • "detected-schemas": [
    ],
  • "confidence": 0.92
}

Structured Query - question to results (all-in-one)

Ask natural language questions and get results directly.

+

Structured Query Overview

+

Combines two operations in one call:

+
    +
  1. NLP Query: Generate GraphQL from question
  2. +
  3. Objects Query: Execute generated query
  4. +
  5. Return Results: Direct answer data
  6. +
+

Simplest way to query knowledge graph with natural language.

+

Comparison with Other Services

+

Structured Query (this service)

+
    +
  • Input: Natural language question
  • +
  • Output: Query results (data)
  • +
  • Use when: Want simple, direct answers
  • +
+

NLP Query + Objects Query (separate calls)

+
    +
  • Step 1: Convert question → GraphQL
  • +
  • Step 2: Execute GraphQL → results
  • +
  • Use when: Need to inspect/modify query before execution
  • +
+

Triples Query (low-level)

+
    +
  • Input: RDF pattern
  • +
  • Output: Matching triples
  • +
  • Use when: Need precise control over graph queries
  • +
+

Response Format

+

Returns standard GraphQL response:

+
    +
  • data: Query results (null if error)
  • +
  • errors: Field-level errors (array of strings)
  • +
  • error: System-level error (generation or execution failure)
  • +
+

Error Handling

+

Three types of errors:

+
    +
  1. Query generation failed: Couldn't understand question
      +
    • Error in error object
    • +
    • data = null
    • +
    +
  2. +
  3. Query execution failed: Generated query had errors
      +
    • Errors in errors array
    • +
    • data may be partial
    • +
    +
  4. +
  5. System error: Infrastructure issue
      +
    • Error in error object
    • +
    +
  6. +
+

Performance

+

Convenience vs control trade-off:

+
    +
  • Faster development: One call instead of two
  • +
  • Less control: Can't inspect/modify generated query
  • +
  • Simpler code: No need to handle intermediate steps
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
question
required
string

Natural language question

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection to query

+

Responses

Request samples

Content type
application/json
Example
{
  • "question": "Who does Alice know?",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
Example
{
  • "data": {
    },
  • "errors": [ ]
}

Structured Diag - analyze structured data formats

Analyze and understand structured data (CSV, JSON, XML).

+

Structured Diag Overview

+

Helps process unknown structured data:

+
    +
  • Detect format: Identify CSV, JSON, or XML
  • +
  • Generate schema: Create descriptor from sample
  • +
  • Match schemas: Find existing schemas that fit data
  • +
  • Full diagnosis: Complete analysis in one call
  • +
+

Essential for data ingestion pipelines.

+

Operations

+

detect-type

+

Identify data format from sample:

+
    +
  • Input: Data sample
  • +
  • Output: Format (csv/json/xml) + confidence
  • +
  • Use when: Format is unknown
  • +
+

generate-descriptor

+

Create schema descriptor:

+
    +
  • Input: Sample + known type
  • +
  • Output: Field definitions, types, structure
  • +
  • Use when: Need to understand data structure
  • +
+ +

Combined analysis:

+
    +
  • Input: Data sample
  • +
  • Output: Format + descriptor + metadata
  • +
  • Use when: Starting from scratch
  • +
+

schema-selection

+

Find matching schemas:

+
    +
  • Input: Data sample
  • +
  • Output: List of schema IDs that match
  • +
  • Use when: Have existing schemas, need to match data
  • +
+

Data Types

+

Supported formats:

+
    +
  • CSV: Comma-separated values (or custom delimiter)
  • +
  • JSON: JSON objects or arrays
  • +
  • XML: XML documents
  • +
+

Options

+

Format-specific options:

+
    +
  • CSV: delimiter, has_header, quote_char
  • +
  • JSON: array_path (for nested arrays)
  • +
  • XML: root_element, record_path
  • +
+

Workflow Example

+
    +
  1. Receive unknown data file
  2. +
  3. Call diagnose operation with sample
  4. +
  5. Get format + schema descriptor
  6. +
  7. Use descriptor to process full dataset
  8. +
  9. Load data via document-load or text-load
  10. +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
operation
required
string
Enum: "detect-type" "generate-descriptor" "diagnose" "schema-selection"

Diagnosis operation:

+
    +
  • detect-type: Identify data format (CSV, JSON, XML)
  • +
  • generate-descriptor: Create schema descriptor for data
  • +
  • diagnose: Full analysis (detect + generate descriptor)
  • +
  • schema-selection: Find matching schemas for data
  • +
+
sample
required
string

Data sample to analyze (text content)

+
type
string
Enum: "csv" "json" "xml"

Data type (required for generate-descriptor)

+
schema-name
string

Target schema name for descriptor generation (optional)

+
object

Format-specific options (e.g., CSV delimiter)

+

Responses

Request samples

Content type
application/json
Example
{
  • "operation": "detect-type",
  • "sample": "name,age,email\nAlice,30,alice@example.com\nBob,25,bob@example.com\n"
}

Response samples

Content type
application/json
Example
{
  • "operation": "detect-type",
  • "detected-type": "csv",
  • "confidence": 0.95
}

Graph Embeddings Query - find similar entities

Query graph embeddings to find similar entities by vector similarity.

+

Graph Embeddings Query Overview

+

Find entities semantically similar to a query vector:

+
    +
  • Input: Query embedding vector
  • +
  • Search: Compare against stored entity embeddings
  • +
  • Output: Most similar entities (RDF URIs)
  • +
+

Core component of graph RAG retrieval.

+

Use Cases

+
    +
  • Entity discovery: Find related entities
  • +
  • Concept expansion: Discover similar concepts
  • +
  • Graph exploration: Navigate by semantic similarity
  • +
  • RAG retrieval: Get entities for context
  • +
+

Process

+
    +
  1. Obtain query embedding (via embeddings service)
  2. +
  3. Query stored entity embeddings
  4. +
  5. Calculate cosine similarity
  6. +
  7. Return top N most similar entities
  8. +
  9. Use entities to retrieve triples/subgraph
  10. +
+

Similarity Scoring

+

Uses cosine similarity between vectors:

+
    +
  • Results ordered by similarity (most similar first)
  • +
  • No explicit similarity scores returned
  • +
  • Limit controls result count
  • +
+

Entity Format

+

Returns RDF values (entities):

+
    +
  • URI entities: {v: "https://...", e: true}
  • +
  • These are references to knowledge graph entities
  • +
  • Use with triples query to get entity details
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
vectors
required
Array of numbers

Query embedding vector

+
limit
integer [ 1 .. 1000 ]
Default: 10

Maximum number of entities to return

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection to search

+

Responses

Request samples

Content type
application/json
Example
{
  • "vectors": [
    ],
  • "limit": 10,
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json

Document Embeddings Query - find similar text chunks

Query document embeddings to find similar text chunks by vector similarity.

+

Document Embeddings Query Overview

+

Find document chunks semantically similar to a query vector:

+
    +
  • Input: Query embedding vector
  • +
  • Search: Compare against stored chunk embeddings
  • +
  • Output: Most similar text chunks
  • +
+

Core component of document RAG retrieval.

+

Use Cases

+
    +
  • Document retrieval: Find relevant passages
  • +
  • Semantic search: Search by meaning not keywords
  • +
  • Context gathering: Get text for RAG
  • +
  • Similar content: Discover related documents
  • +
+

Process

+
    +
  1. Obtain query embedding (via embeddings service)
  2. +
  3. Query stored document chunk embeddings
  4. +
  5. Calculate cosine similarity
  6. +
  7. Return top N most similar chunks
  8. +
  9. Use chunks as context for generation
  10. +
+

Chunking

+

Documents are split into chunks during indexing:

+
    +
  • Typical size: 200-1000 tokens
  • +
  • Overlap between chunks for continuity
  • +
  • Each chunk has own embedding
  • +
+

Queries return individual chunks, not full documents.

+

Similarity Scoring

+

Uses cosine similarity:

+
    +
  • Results ordered by similarity
  • +
  • No explicit scores in response
  • +
  • Limit controls result count
  • +
+

Output Format

+

Returns text chunks as strings:

+
    +
  • Raw chunk text
  • +
  • No metadata (source, position, etc.)
  • +
  • Use for LLM context directly
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
vectors
required
Array of numbers

Query embedding vector

+
limit
integer [ 1 .. 1000 ]
Default: 10

Maximum number of document chunks to return

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection to search

+

Responses

Request samples

Content type
application/json
Example
{
  • "vectors": [
    ],
  • "limit": 10,
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
{
  • "chunks": [
    ]
}

Text Load - load text documents

Load text documents into processing pipeline for indexing and embedding.

+

Text Load Overview

+

Fire-and-forget document loading:

+
    +
  • Input: Text content (base64 encoded)
  • +
  • Process: Chunk, embed, store
  • +
  • Output: None (202 Accepted)
  • +
+

Asynchronous processing - document queued for background processing.

+

Processing Pipeline

+

Text documents go through:

+
    +
  1. Chunking: Split into overlapping chunks
  2. +
  3. Embedding: Generate vectors for each chunk
  4. +
  5. Storage: Store chunks + embeddings
  6. +
  7. Indexing: Make searchable via document-embeddings query
  8. +
+

Pipeline runs asynchronously after request returns.

+

Text Format

+

Text must be base64 encoded:

+
text_content = "This is the document..."
+encoded = base64.b64encode(text_content.encode('utf-8'))
+
+

Default charset is UTF-8, specify charset if different.

+

Metadata

+

Optional RDF triples describing document:

+
    +
  • Title, author, date
  • +
  • Source URL
  • +
  • Custom properties
  • +
  • Used for organization and retrieval
  • +
+

Use Cases

+
    +
  • Document ingestion: Add documents to knowledge base
  • +
  • Bulk loading: Process multiple documents
  • +
  • Content updates: Replace existing documents
  • +
  • Library integration: Load from document library
  • +
+

No Response Data

+

Returns 202 Accepted immediately:

+
    +
  • Document queued for processing
  • +
  • No synchronous result
  • +
  • No processing status
  • +
  • Check document-embeddings query later to verify indexed
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
text
required
string <byte>

Text content (base64 encoded)

+
id
string

Document identifier

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection for document

+
charset
string
Default: "utf-8"

Text character encoding

+
Array of objects (Triple)

Document metadata as RDF triples

+

Responses

Request samples

Content type
application/json
Example
{
  • "text": "VGhpcyBpcyB0aGUgZG9jdW1lbnQgdGV4dC4uLg==",
  • "id": "doc-123",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
{ }

Document Load - load binary documents (PDF, etc.)

Load binary documents (PDF, Word, etc.) into processing pipeline.

+

Document Load Overview

+

Fire-and-forget binary document loading:

+
    +
  • Input: Document data (base64 encoded)
  • +
  • Process: Extract text, chunk, embed, store
  • +
  • Output: None (202 Accepted)
  • +
+

Asynchronous processing for PDF and other binary formats.

+

Processing Pipeline

+

Documents go through:

+
    +
  1. Text extraction: PDF→text, DOCX→text, etc.
  2. +
  3. Chunking: Split into overlapping chunks
  4. +
  5. Embedding: Generate vectors for each chunk
  6. +
  7. Storage: Store chunks + embeddings
  8. +
  9. Indexing: Make searchable
  10. +
+

Pipeline runs asynchronously.

+

Supported Formats

+
    +
  • PDF: Portable Document Format
  • +
  • DOCX: Microsoft Word
  • +
  • HTML: Web pages
  • +
  • Other formats via extractors
  • +
+

Format detected from content, not extension.

+

Binary Encoding

+

Documents must be base64 encoded:

+
with open('document.pdf', 'rb') as f:
+    doc_bytes = f.read()
+encoded = base64.b64encode(doc_bytes).decode('utf-8')
+
+

Metadata

+

Optional RDF triples:

+
    +
  • Document properties
  • +
  • Source information
  • +
  • Custom attributes
  • +
+

Use Cases

+
    +
  • PDF ingestion: Process research papers
  • +
  • Document libraries: Index document collections
  • +
  • Content migration: Import from other systems
  • +
  • Automated processing: Batch document loading
  • +
+

No Response Data

+

Returns 202 Accepted immediately:

+
    +
  • Document queued
  • +
  • Processing happens asynchronously
  • +
  • No status tracking
  • +
  • Query later to verify indexed
  • +
+
Authorizations:
bearerAuth
path Parameters
flow
required
string
Example: my-flow

Flow instance ID

+
Request Body schema: application/json
required
data
required
string <byte>

Document data (base64 encoded)

+
id
string

Document identifier

+
user
string
Default: "trustgraph"

User identifier

+
collection
string
Default: "default"

Collection for document

+
Array of objects (Triple)

Document metadata as RDF triples

+

Responses

Request samples

Content type
application/json
Example
{
  • "data": "JVBERi0xLjQKJeLjz9MKMSAwIG9iago8PC9UeXBlL0NhdGFsb2cvUGFnZXMgMiAwIFI+PmVuZG9iagoyIDAgb2JqCjw8L1R5cGUvUGFnZXMvS2lkc1szIDAgUl0vQ291bnQgMT4+ZW5kb2JqCg==",
  • "id": "doc-789",
  • "user": "alice",
  • "collection": "research"
}

Response samples

Content type
application/json
{ }

Import/Export

Bulk data import and export

+

Import Core - bulk import triples and embeddings

Import knowledge cores in bulk using streaming MessagePack format.

+

Import Core Overview

+

Bulk data import for knowledge graph:

+
    +
  • Format: MessagePack streaming
  • +
  • Content: Triples and/or graph embeddings
  • +
  • Target: Global knowledge storage
  • +
  • Use: Backup restoration, data migration, bulk loading
  • +
+

MessagePack Protocol

+

Request body is MessagePack stream with message tuples:

+

Triple Message

+
("t", {
+  "m": {              // Metadata
+    "i": "core-id",   // Knowledge core ID
+    "m": [...],       // Metadata triples array
+    "u": "user",      // User
+    "c": "collection" // Collection
+  },
+  "t": [...]          // Triples array
+})
+
+

Graph Embeddings Message

+
("ge", {
+  "m": {              // Metadata
+    "i": "core-id",
+    "m": [...],
+    "u": "user",
+    "c": "collection"
+  },
+  "e": [              // Entities array
+    {
+      "e": {"v": "uri", "e": true},  // Entity RdfValue
+      "v": [0.1, 0.2, ...]             // Vectors
+    }
+  ]
+})
+
+

Query Parameters

+
    +
  • id: Knowledge core ID
  • +
  • user: User identifier
  • +
+

Streaming

+

Multiple messages can be sent in stream. +Each message processed as received. +No response body - returns 202 Accepted.

+

Use Cases

+
    +
  • Backup restoration: Restore from export
  • +
  • Data migration: Move data between systems
  • +
  • Bulk loading: Initial knowledge base population
  • +
  • Replication: Copy knowledge cores
  • +
+
Authorizations:
bearerAuth
query Parameters
id
required
string
Example: id=core-123

Knowledge core ID to import

+
user
required
string
Example: user=alice

User identifier

+
Request Body schema: application/msgpack
required
string <binary>

MessagePack stream of knowledge data

+

Responses

Response samples

Content type
application/json
{ }

Export Core - bulk export triples and embeddings

Export knowledge cores in bulk using streaming MessagePack format.

+

Export Core Overview

+

Bulk data export for knowledge graph:

+
    +
  • Format: MessagePack streaming
  • +
  • Content: Triples and graph embeddings
  • +
  • Source: Global knowledge storage
  • +
  • Use: Backups, data migration, archival
  • +
+

MessagePack Protocol

+

Response body is MessagePack stream with message tuples:

+

Triple Message

+
("t", {
+  "m": {              // Metadata
+    "i": "core-id",   // Knowledge core ID
+    "m": [...],       // Metadata triples array
+    "u": "user",      // User
+    "c": "collection" // Collection
+  },
+  "t": [...]          // Triples array
+})
+
+

Graph Embeddings Message

+
("ge", {
+  "m": {              // Metadata
+    "i": "core-id",
+    "m": [...],
+    "u": "user",
+    "c": "collection"
+  },
+  "e": [              // Entities array
+    {
+      "e": {"v": "uri", "e": true},  // Entity RdfValue
+      "v": [0.1, 0.2, ...]             // Vectors
+    }
+  ]
+})
+
+

End of Stream Message

+
("eos", {})
+
+

Query Parameters

+
    +
  • id: Knowledge core ID to export
  • +
  • user: User identifier
  • +
+

Streaming

+

Data streamed incrementally:

+
    +
  • Triples sent first
  • +
  • Graph embeddings sent next
  • +
  • EOS marker signals completion
  • +
+

Client should process messages as received.

+

Use Cases

+
    +
  • Backups: Export for disaster recovery
  • +
  • Data migration: Move to another system
  • +
  • Archival: Long-term storage
  • +
  • Replication: Copy knowledge cores
  • +
  • Analysis: External processing
  • +
+
Authorizations:
bearerAuth
query Parameters
id
required
string
Example: id=core-123

Knowledge core ID to export

+
user
required
string
Example: user=alice

User identifier

+

Responses

Response samples

Content type
application/json
{
  • "error": "Unauthorized"
}

WebSocket

WebSocket interfaces

+

WebSocket - multiplexed service interface

WebSocket interface providing multiplexed access to all TrustGraph services over a single persistent connection.

+

Overview

+

The WebSocket API provides access to the same services as the REST API but with:

+
    +
  • Multiplexed: Multiple concurrent requests over one connection
  • +
  • Asynchronous: Non-blocking request/response with ID matching
  • +
  • Efficient: Reduced overhead compared to HTTP
  • +
  • Real-time: Low latency bidirectional communication
  • +
+

Connection

+

Establish WebSocket connection to:

+
ws://localhost:8088/api/v1/socket
+
+

Message Protocol

+

All messages are JSON objects with the following structure:

+

Request Message Format

+

Global Service Request (no flow parameter):

+
{
+  "id": "req-123",
+  "service": "config",
+  "request": {
+    "operation": "list",
+    "type": "flow"
+  }
+}
+
+

Flow-Hosted Service Request (with flow parameter):

+
{
+  "id": "req-456",
+  "service": "agent",
+  "flow": "my-flow",
+  "request": {
+    "question": "What is quantum computing?",
+    "streaming": true
+  }
+}
+
+

Request Fields:

+
    +
  • id (string, required): Client-generated unique identifier for this request within the session. Used to match responses to requests.
  • +
  • service (string, required): Service identifier (e.g., "config", "agent", "document-rag"). Same as {kind} in REST URLs.
  • +
  • flow (string, optional): Flow ID for flow-hosted services. Omit for global services.
  • +
  • request (object, required): Service-specific request payload. Same structure as REST API request body.
  • +
+

Response Message Format

+

Success Response:

+
{
+  "id": "req-123",
+  "response": {
+    "chunk-type": "answer",
+    "content": "Quantum computing uses...",
+    "end-of-stream": false
+  }
+}
+
+

Error Response:

+
{
+  "id": "req-123",
+  "error": {
+    "type": "gateway-error",
+    "message": "Flow not found"
+  }
+}
+
+

Response Fields:

+
    +
  • id (string, required): Matches the id from the request. Client uses this to correlate responses.
  • +
  • response (object, conditional): Service-specific response payload. Same structure as REST API response. Present on success.
  • +
  • error (object, conditional): Error information with type and message fields. Present on failure.
  • +
+

Service Routing

+

The WebSocket protocol routes to services using message parameters instead of URL paths:

+ + + + + + + + + + + + + + + +
REST EndpointWebSocket Message
POST /api/v1/config{"service": "config"}
POST /api/v1/flow/{flow}/service/agent{"service": "agent", "flow": "my-flow"}
+

Global Services (no flow parameter):

+
    +
  • config - Configuration management
  • +
  • flow - Flow lifecycle and blueprints
  • +
  • librarian - Document library management
  • +
  • knowledge - Knowledge graph core management
  • +
  • collection-management - Collection metadata
  • +
+

Flow-Hosted Services (require flow parameter):

+
    +
  • AI services: agent, text-completion, prompt, document-rag, graph-rag
  • +
  • Embeddings: embeddings, graph-embeddings, document-embeddings
  • +
  • Query: triples, objects, nlp-query, structured-query
  • +
  • Data loading: text-load, document-load
  • +
  • Utilities: mcp-tool, structured-diag
  • +
+

Request/Response Schemas

+

The request and response fields use identical schemas to the REST API for each service. +See individual service documentation for detailed request/response formats.

+

Multiplexing and Asynchronous Operation

+

Multiple requests can be in flight simultaneously:

+
    +
  • Client sends requests with unique id values
  • +
  • Server processes requests concurrently
  • +
  • Responses arrive asynchronously and may be out of order
  • +
  • Client matches responses to requests using the id field
  • +
  • No head-of-line blocking
  • +
+

Example concurrent requests:

+
{"id": "req-1", "service": "config", "request": {...}}
+{"id": "req-2", "service": "agent", "flow": "f1", "request": {...}}
+{"id": "req-3", "service": "document-rag", "flow": "f2", "request": {...}}
+
+

Responses may arrive in any order: req-2, req-1, req-3

+

Streaming Responses

+

Services that support streaming (e.g., agent, RAG) send multiple response messages with the same id:

+
{"id": "req-1", "response": {"chunk-type": "thought", "content": "...", "end-of-stream": false}}
+{"id": "req-1", "response": {"chunk-type": "answer", "content": "...", "end-of-stream": false}}
+{"id": "req-1", "response": {"chunk-type": "answer", "content": "...", "end-of-stream": true}}
+
+

The end-of-stream flag (or service-specific completion flag) indicates the final message.

+

Authentication

+

When GATEWAY_SECRET is set, include bearer token:

+
    +
  • As query parameter: ws://localhost:8088/api/v1/socket?token=<token>
  • +
  • Or in WebSocket subprotocol header
  • +
+

Benefits Over REST

+
    +
  • Lower latency: No TCP/TLS handshake per request
  • +
  • Connection reuse: Single persistent connection
  • +
  • Reduced overhead: No HTTP headers per message
  • +
  • True streaming: Bidirectional real-time communication
  • +
  • Efficient multiplexing: Concurrent operations without connection pooling
  • +
+
Authorizations:
bearerAuth
header Parameters
Upgrade
required
string
Value: "websocket"

WebSocket upgrade header

+
Connection
required
string
Value: "Upgrade"

Connection upgrade header

+

Responses

Response samples

Content type
application/json
{
  • "error": "Unauthorized"
}

Metrics

System metrics and monitoring

+

Metrics - Prometheus metrics endpoint

Proxy to Prometheus metrics for system monitoring.

+

Metrics Overview

+

Exposes system metrics via Prometheus format:

+
    +
  • Gateway metrics: Request rates, latencies, errors
  • +
  • Flow metrics: Processing throughput, queue depths
  • +
  • System metrics: Resource usage, health status
  • +
+

Prometheus Format

+

Returns metrics in Prometheus text exposition format:

+
# HELP metric_name Description
+# TYPE metric_name counter
+metric_name{label="value"} 123.45
+
+

Available Metrics

+

Common metrics include:

+
    +
  • Request count and rates
  • +
  • Response times (histograms)
  • +
  • Error rates
  • +
  • Active connections
  • +
  • Queue depths
  • +
  • Processing latencies
  • +
+

Integration

+

Standard Prometheus scraping:

+
    +
  • Configure Prometheus to scrape /api/metrics
  • +
  • Set appropriate scrape interval
  • +
  • Use bearer token if authentication enabled
  • +
+

Path Parameter

+

The {path} parameter allows querying specific Prometheus endpoints +or metrics if the backend Prometheus supports it.

+
Authorizations:
bearerAuth

Responses

Response samples

Content type
application/json
{
  • "error": "Unauthorized"
}

Metrics - Prometheus metrics with path

Proxy to Prometheus metrics with optional path parameter.

+
Authorizations:
bearerAuth
path Parameters
path
required
string
Example: query

Path to specific metrics endpoint

+

Responses

Response samples

Content type
application/json
{
  • "error": "Unauthorized"
}
+ + + + diff --git a/docs/apis/README.md b/docs/apis/README.md deleted file mode 100644 index bf62f00f..00000000 --- a/docs/apis/README.md +++ /dev/null @@ -1,109 +0,0 @@ - -# TrustGraph APIs - -## Overview - -If you want to interact with TrustGraph through APIs, there are 4 -forms of API which may be of interest to you. All four mechanisms -invoke the same underlying TrustGraph functionality but are made -available for integration in different ways: - -### Pulsar APIs - -Apache Pulsar is a pub/sub system used to deliver messages between TrustGraph -components. Using Pulsar, you can communicate with TrustGraph components. - -Pros: - - Provides complete access to all TrustGraph functionality - - Simple integration with metrics and observability - -Cons: - - Integration is non-trivial, requires a special-purpose Pulsar client - library - - The Pulsar interfaces are likely something that you would not want to - expose outside of the processing cluster in a production or well-secured - deployment - -### REST APIs - -A component, `api-gateway`, provides a bridge between Pulsar internals and -the REST API which allows many services to be invoked using REST APIs. - -Pros: - - Uses standard REST approach can be easily integrated into many kinds - of technology - - Can be easily protected with authentication and TLS for production-grade - or secure deployments - -Cons: - - For a complex application, a long series of REST invocations has - latency and performance overheads - HTTP has limits on the number - of concurrent service invocations - - Lower coverage of functionality - service interfaces need to be added to - `api-gateway` to permit REST invocation - -### Websocket API - -The `api-gateway` component also provides access to services through a -websocket API. - -Pros: - - Usable through a standard websocket library - - Can be easily protected with authentication and TLS for production-grade - or secure deployments - - Supports concurrent service invocations - -Cons: - - Websocket service invocation is a little more complex to develop than - using a basic REST API, particular if you want to cover all of the error - scenarios well - -### Python SDK API - -The `trustgraph-base` package provides a Python SDK that wraps the underlying -service invocations in a convenient Python API. - -Pros: - - Native Python integration with type hints and documentation - - Simplified service invocation without manual message handling - - Built-in error handling and response parsing - - Convenient for Python-based applications and scripts - -Cons: - - Python-specific, not available for other programming languages - - Requires Python environment and trustgraph-base package installation - - Less control over low-level message handling - -## Flow-hosted APIs - -There are two types of APIs: Flow-hosted which need a flow to be running -to operate. Non-flow-hosted which are core to the system, and can -be seen as 'global' - they are not dependent on a flow to be running. - -Knowledge, Librarian, Config and Flow APIs fall into the latter -category. - -## See also - -- [TrustGraph websocket overview](websocket.md) -- [TrustGraph Pulsar overview](pulsar.md) -- API details - - [Text completion](api-text-completion.md) - - [Prompt completion](api-prompt.md) - - [Graph RAG](api-graph-rag.md) - - [Document RAG](api-document-rag.md) - - [Agent](api-agent.md) - - [Embeddings](api-embeddings.md) - - [Graph embeddings](api-graph-embeddings.md) - - [Document embeddings](api-document-embeddings.md) - - [Entity contexts](api-entity-contexts.md) - - [Triples query](api-triples-query.md) - - [Document load](api-document-load.md) - - [Text load](api-text-load.md) - - [Config](api-config.md) - - [Flow](api-flow.md) - - [Librarian](api-librarian.md) - - [Knowledge](api-knowledge.md) - - [Metrics](api-metrics.md) - - [Core import/export](api-core-import-export.md) - diff --git a/docs/apis/api-agent.md b/docs/apis/api-agent.md deleted file mode 100644 index fab7b32b..00000000 --- a/docs/apis/api-agent.md +++ /dev/null @@ -1,136 +0,0 @@ - -# TrustGraph Agent API - -The REST service provides incomplete functionality: The agent service -is able to provide multi-part responses containing 'thought' and -'observation' messages as the agent manager iterates over resolution of the -question. These responses are provided in the websocket, but not the REST -API. - -## Request/response - -### Request - -The request contains the following fields: -- `question`: A string, the question which the agent API must resolve -- `plan`: Optional, not used -- `state`: Optional, not used - -### Response - -The response contains the following fields: -- `thought`: Optional, a string, provides an interim agent thought -- `observation`: Optional, a string, provides an interim agent thought -- `answer`: Optional, a string, provides the final answer - -## REST service - -The REST service accepts a request object containing the question field. -The response is a JSON object containing the `answer` field. Interim -responses are not provided. - -e.g. - -Request: -``` -{ - "question": "What does NASA stand for?" -} -``` - -Response: - -``` -{ - "answer": "National Aeronautics and Space Administration" -} -``` - -## Websocket - -Agent requests have a `request` object containing the `question` field. -Responses have a `response` object containing `thought`, `observation` -and `answer` fields in multi-part responses. The final `answer` response -has `complete` set to `true`. - -e.g. - -Request: - -``` -{ - "id": "blrqotfefnmnh7de-20", - "service": "agent", - "flow": "default", - "request": { - "question": "What does NASA stand for?" - } -} -``` - -Responses: - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "thought": "I need to query a knowledge base" - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "observation": "National Aeronautics and Space Administration." - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "thought": "I now know the final answer" - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "answer": "National Aeronautics and Space Administration" - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Agent API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/agent.py - -Default request queue: -`non-persistent://tg/request/agent` - -Default response queue: -`non-persistent://tg/response/agent` - -Request schema: -`trustgraph.schema.AgentRequest` - -Response schema: -`trustgraph.schema.AgentResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.AgentClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/agent_client.py diff --git a/docs/apis/api-config.md b/docs/apis/api-config.md deleted file mode 100644 index d9cf7d23..00000000 --- a/docs/apis/api-config.md +++ /dev/null @@ -1,261 +0,0 @@ -# TrustGraph Config API - -This API provides centralized configuration management for TrustGraph components. -Configuration data is organized hierarchically by type and key, with support for -persistent storage and push notifications. - -## Request/response - -### Request - -The request contains the following fields: -- `operation`: The operation to perform (`get`, `list`, `getvalues`, `put`, `delete`, `config`) -- `keys`: Array of ConfigKey objects (for `get`, `delete` operations) -- `type`: Configuration type (for `list`, `getvalues` operations) -- `values`: Array of ConfigValue objects (for `put` operation) - -### Response - -The response contains the following fields: -- `version`: Version number for tracking changes -- `values`: Array of ConfigValue objects returned by operations -- `directory`: Array of key names returned by `list` operation -- `config`: Full configuration map returned by `config` operation -- `error`: Error information if operation fails - -## Operations - -### PUT - Store Configuration Values - -Request: -```json -{ - "operation": "put", - "values": [ - { - "type": "test", - "key": "key1", - "value": "value1" - } - ] -} -``` - -Response: -```json -{ - "version": 123 -} -``` - -### GET - Retrieve Configuration Values - -Request: -```json -{ - "operation": "get", - "keys": [ - { - "type": "test", - "key": "key1" - } - ] -} -``` - -Response: -```json -{ - "version": 123, - "values": [ - { - "type": "test", - "key": "key1", - "value": "value1" - } - ] -} -``` - -### LIST - List Keys by Type - -Request: -```json -{ - "operation": "list", - "type": "test" -} -``` - -Response: -```json -{ - "version": 123, - "directory": ["key1", "key2", "key3"] -} -``` - -### GETVALUES - Get All Values by Type - -Request: -```json -{ - "operation": "getvalues", - "type": "test" -} -``` - -Response: -```json -{ - "version": 123, - "values": [ - { - "type": "test", - "key": "key1", - "value": "value1" - }, - { - "type": "test", - "key": "key2", - "value": "value2" - } - ] -} -``` - -### CONFIG - Get Entire Configuration - -Request: -```json -{ - "operation": "config" -} -``` - -Response: -```json -{ - "version": 123, - "config": { - "test": { - "key1": "value1", - "key2": "value2" - } - } -} -``` - -### DELETE - Remove Configuration Values - -Request: -```json -{ - "operation": "delete", - "keys": [ - { - "type": "test", - "key": "key1" - } - ] -} -``` - -Response: -```json -{ - "version": 124 -} -``` - -## REST service - -The REST service is available at `/api/v1/config` and accepts the above request formats. - -## Websocket - -Requests have a `request` object containing the operation fields. -Responses have a `response` object containing the response fields. - -Request: -```json -{ - "id": "unique-request-id", - "service": "config", - "request": { - "operation": "get", - "keys": [ - { - "type": "test", - "key": "key1" - } - ] - } -} -``` - -Response: -```json -{ - "id": "unique-request-id", - "response": { - "version": 123, - "values": [ - { - "type": "test", - "key": "key1", - "value": "value1" - } - ] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Config API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/config.py - -Default request queue: -`non-persistent://tg/request/config` - -Default response queue: -`non-persistent://tg/response/config` - -Request schema: -`trustgraph.schema.ConfigRequest` - -Response schema: -`trustgraph.schema.ConfigResponse` - -## Python SDK - -The Python SDK provides convenient access to the Config API: - -```python -from trustgraph.api.config import ConfigClient - -client = ConfigClient() - -# Put a value -await client.put("test", "key1", "value1") - -# Get a value -value = await client.get("test", "key1") - -# List keys -keys = await client.list("test") - -# Get all values for a type -values = await client.get_values("test") -``` - -## Features - -- **Hierarchical Organization**: Configuration organized by type and key -- **Versioning**: Each operation returns a version number for change tracking -- **Persistent Storage**: Data stored in Cassandra for persistence -- **Push Notifications**: Configuration changes pushed to subscribers -- **Multiple Access Methods**: Available via Pulsar, REST, WebSocket, and Python SDK \ No newline at end of file diff --git a/docs/apis/api-core-import-export.md b/docs/apis/api-core-import-export.md deleted file mode 100644 index f1530447..00000000 --- a/docs/apis/api-core-import-export.md +++ /dev/null @@ -1,324 +0,0 @@ -# TrustGraph Core Import/Export API - -This API provides bulk import and export capabilities for TrustGraph knowledge cores. -It handles efficient transfer of both RDF triples and graph embeddings using MessagePack -binary format for high-performance data exchange. - -## Overview - -The Core Import/Export API enables: -- **Bulk Import**: Import large knowledge cores from binary streams -- **Bulk Export**: Export knowledge cores as binary streams -- **Efficient Format**: Uses MessagePack for compact, fast serialization -- **Dual Data Types**: Handles both RDF triples and graph embeddings -- **Streaming**: Supports streaming for large datasets - -## Import Endpoint - -**Endpoint:** `POST /api/v1/import-core` - -**Query Parameters:** -- `id`: Knowledge core identifier -- `user`: User identifier - -**Content-Type:** `application/octet-stream` - -**Request Body:** MessagePack-encoded binary stream - -### Import Process - -1. **Stream Processing**: Reads binary data in 128KB chunks -2. **MessagePack Decoding**: Unpacks binary data into structured messages -3. **Knowledge Storage**: Stores data via Knowledge API -4. **Response**: Returns success/error status - -### Import Data Format - -The import stream contains MessagePack-encoded tuples with type indicators: - -#### Triples Data -```python -("t", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "t": [ # triples array - { - "s": {"value": "subject", "is_uri": true}, - "p": {"value": "predicate", "is_uri": true}, - "o": {"value": "object", "is_uri": false} - } - ] -}) -``` - -#### Graph Embeddings Data -```python -("ge", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "e": [ # entities array - { - "e": {"value": "entity", "is_uri": true}, - "v": [[0.1, 0.2, 0.3]] # vectors - } - ] -}) -``` - -## Export Endpoint - -**Endpoint:** `GET /api/v1/export-core` - -**Query Parameters:** -- `id`: Knowledge core identifier -- `user`: User identifier - -**Content-Type:** `application/octet-stream` - -**Response Body:** MessagePack-encoded binary stream - -### Export Process - -1. **Knowledge Retrieval**: Fetches data via Knowledge API -2. **MessagePack Encoding**: Encodes data into binary format -3. **Streaming Response**: Sends data as binary stream -4. **Type Identification**: Uses type prefixes for data classification - -## Usage Examples - -### Import Knowledge Core - -```bash -# Import from file -curl -X POST \ - -H "Authorization: Bearer your-token" \ - -H "Content-Type: application/octet-stream" \ - --data-binary @knowledge-core.msgpack \ - "http://api-gateway:8080/api/v1/import-core?id=core-123&user=alice" -``` - -### Export Knowledge Core - -```bash -# Export to file -curl -H "Authorization: Bearer your-token" \ - "http://api-gateway:8080/api/v1/export-core?id=core-123&user=alice" \ - -o knowledge-core.msgpack -``` - -## Python Integration - -### Import Example - -```python -import msgpack -import requests - -def import_knowledge_core(core_id, user, triples_data, embeddings_data, token): - # Prepare data - messages = [] - - # Add triples - if triples_data: - messages.append(("t", { - "m": { - "i": core_id, - "m": [], - "u": user, - "c": "default" - }, - "t": triples_data - })) - - # Add embeddings - if embeddings_data: - messages.append(("ge", { - "m": { - "i": core_id, - "m": [], - "u": user, - "c": "default" - }, - "e": embeddings_data - })) - - # Pack data - binary_data = b''.join(msgpack.packb(msg) for msg in messages) - - # Upload - response = requests.post( - f"http://api-gateway:8080/api/v1/import-core?id={core_id}&user={user}", - headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/octet-stream" - }, - data=binary_data - ) - - return response.status_code == 200 - -# Usage -triples = [ - { - "s": {"value": "Person1", "is_uri": True}, - "p": {"value": "hasName", "is_uri": True}, - "o": {"value": "John Doe", "is_uri": False} - } -] - -embeddings = [ - { - "e": {"value": "Person1", "is_uri": True}, - "v": [[0.1, 0.2, 0.3, 0.4]] - } -] - -success = import_knowledge_core("core-123", "alice", triples, embeddings, "your-token") -``` - -### Export Example - -```python -import msgpack -import requests - -def export_knowledge_core(core_id, user, token): - response = requests.get( - f"http://api-gateway:8080/api/v1/export-core?id={core_id}&user={user}", - headers={"Authorization": f"Bearer {token}"} - ) - - if response.status_code != 200: - return None - - # Decode MessagePack stream - data = response.content - unpacker = msgpack.Unpacker() - unpacker.feed(data) - - triples = [] - embeddings = [] - - for unpacked in unpacker: - msg_type, msg_data = unpacked - - if msg_type == "t": - triples.extend(msg_data["t"]) - elif msg_type == "ge": - embeddings.extend(msg_data["e"]) - - return { - "triples": triples, - "embeddings": embeddings - } - -# Usage -data = export_knowledge_core("core-123", "alice", "your-token") -if data: - print(f"Exported {len(data['triples'])} triples") - print(f"Exported {len(data['embeddings'])} embeddings") -``` - -## Data Format Specification - -### MessagePack Tuples - -Each message is a tuple: `(type_indicator, data_object)` - -**Type Indicators:** -- `"t"`: RDF triples data -- `"ge"`: Graph embeddings data - -### Metadata Structure - -```python -{ - "i": "core-identifier", # ID - "m": [...], # Metadata triples array - "u": "user-identifier", # User - "c": "collection-name" # Collection -} -``` - -### Triple Structure - -```python -{ - "s": {"value": "subject", "is_uri": boolean}, - "p": {"value": "predicate", "is_uri": boolean}, - "o": {"value": "object", "is_uri": boolean} -} -``` - -### Entity Embedding Structure - -```python -{ - "e": {"value": "entity", "is_uri": boolean}, - "v": [[float, float, ...]] # Array of vectors -} -``` - -## Performance Characteristics - -### Import Performance -- **Streaming**: Processes data in 128KB chunks -- **Memory Efficient**: Incremental unpacking -- **Concurrent**: Multiple imports can run simultaneously -- **Error Handling**: Robust error recovery - -### Export Performance -- **Direct Streaming**: Data streamed directly from knowledge store -- **Efficient Encoding**: MessagePack for minimal overhead -- **Large Dataset Support**: Handles cores of any size - -## Error Handling - -### Import Errors -- **Format Errors**: Invalid MessagePack data -- **Type Errors**: Unknown type indicators -- **Storage Errors**: Knowledge API failures -- **Authentication**: Invalid user credentials - -### Export Errors -- **Not Found**: Core ID doesn't exist -- **Access Denied**: User lacks permissions -- **System Errors**: Knowledge API failures - -### Error Responses -- **HTTP 400**: Bad request (invalid parameters) -- **HTTP 401**: Unauthorized access -- **HTTP 404**: Core not found -- **HTTP 500**: Internal server error - -## Use Cases - -### Data Migration -- **System Upgrades**: Export/import during system migrations -- **Environment Sync**: Copy cores between environments -- **Backup/Restore**: Full knowledge core backup operations - -### Batch Processing -- **Bulk Loading**: Load large knowledge datasets efficiently -- **Data Integration**: Merge knowledge from multiple sources -- **ETL Pipelines**: Extract-Transform-Load operations - -### Performance Optimization -- **Faster Than REST**: Binary format reduces transfer time -- **Atomic Operations**: Complete import/export as single operation -- **Resource Efficient**: Minimal memory footprint during transfer - -## Security Considerations - -- **Authentication Required**: Bearer token authentication -- **User Isolation**: Access restricted to user's own cores -- **Data Validation**: Input validation on import -- **Audit Logging**: Operations logged for security auditing \ No newline at end of file diff --git a/docs/apis/api-document-embeddings.md b/docs/apis/api-document-embeddings.md deleted file mode 100644 index 749567b5..00000000 --- a/docs/apis/api-document-embeddings.md +++ /dev/null @@ -1,252 +0,0 @@ -# TrustGraph Document Embeddings API - -This API provides import, export, and query capabilities for document embeddings. It handles -document chunks with their vector embeddings and metadata, supporting both real-time WebSocket -operations and request/response patterns. - -## Schema Overview - -### DocumentEmbeddings Structure -- `metadata`: Document metadata (ID, user, collection, RDF triples) -- `chunks`: Array of document chunks with embeddings - -### ChunkEmbeddings Structure -- `chunk`: Text chunk as bytes -- `vectors`: Array of vector embeddings (Array of Array of Double) - -### DocumentEmbeddingsRequest Structure -- `vectors`: Query vector embeddings -- `limit`: Maximum number of results -- `user`: User identifier -- `collection`: Collection identifier - -### DocumentEmbeddingsResponse Structure -- `error`: Error information if operation fails -- `documents`: Array of matching documents as bytes - -## Import/Export Operations - -### Import - WebSocket Endpoint - -**Endpoint:** `/api/v1/flow/{flow}/import/document-embeddings` - -**Method:** WebSocket connection - -**Request Format:** -```json -{ - "metadata": { - "id": "doc-123", - "user": "alice", - "collection": "research", - "metadata": [ - { - "s": {"v": "doc-123", "e": true}, - "p": {"v": "dc:title", "e": true}, - "o": {"v": "Research Paper", "e": false} - } - ] - }, - "chunks": [ - { - "chunk": "This is the first chunk of the document...", - "vectors": [ - [0.1, 0.2, 0.3, 0.4], - [0.5, 0.6, 0.7, 0.8] - ] - }, - { - "chunk": "This is the second chunk...", - "vectors": [ - [0.9, 0.8, 0.7, 0.6], - [0.5, 0.4, 0.3, 0.2] - ] - } - ] -} -``` - -**Response:** Import operations are fire-and-forget with no response payload. - -### Export - WebSocket Endpoint - -**Endpoint:** `/api/v1/flow/{flow}/export/document-embeddings` - -**Method:** WebSocket connection - -The export endpoint streams document embeddings data in real-time. Each message contains: - -```json -{ - "metadata": { - "id": "doc-123", - "user": "alice", - "collection": "research", - "metadata": [ - { - "s": {"v": "doc-123", "e": true}, - "p": {"v": "dc:title", "e": true}, - "o": {"v": "Research Paper", "e": false} - } - ] - }, - "chunks": [ - { - "chunk": "Decoded text content of chunk", - "vectors": [[0.1, 0.2, 0.3, 0.4]] - } - ] -} -``` - -## Query Operations - -### Query Document Embeddings - -**Purpose:** Find documents similar to provided vector embeddings - -**Request:** -```json -{ - "vectors": [ - [0.1, 0.2, 0.3, 0.4, 0.5], - [0.6, 0.7, 0.8, 0.9, 1.0] - ], - "limit": 10, - "user": "alice", - "collection": "research" -} -``` - -**Response:** -```json -{ - "documents": [ - "base64-encoded-document-1", - "base64-encoded-document-2" - ] -} -``` - -## WebSocket Usage Examples - -### Importing Document Embeddings - -```javascript -// Connect to import endpoint -const ws = new WebSocket('ws://api-gateway:8080/api/v1/flow/my-flow/import/document-embeddings'); - -// Send document embeddings -ws.send(JSON.stringify({ - metadata: { - id: "doc-123", - user: "alice", - collection: "research" - }, - chunks: [ - { - chunk: "Document content chunk 1", - vectors: [[0.1, 0.2, 0.3]] - } - ] -})); -``` - -### Exporting Document Embeddings - -```javascript -// Connect to export endpoint -const ws = new WebSocket('ws://api-gateway:8080/api/v1/flow/my-flow/export/document-embeddings'); - -// Listen for exported data -ws.onmessage = (event) => { - const documentEmbeddings = JSON.parse(event.data); - console.log('Received document:', documentEmbeddings.metadata.id); - console.log('Chunks:', documentEmbeddings.chunks.length); -}; -``` - -## Data Format Details - -### Metadata Format -Each metadata triple contains: -- `s`: Subject (object with `v` for value and `e` for is_entity boolean) -- `p`: Predicate (object with `v` for value and `e` for is_entity boolean) -- `o`: Object (object with `v` for value and `e` for is_entity boolean) - -### Vector Format -- Vectors are arrays of floating-point numbers -- Each chunk can have multiple vectors (different embedding models) -- Vectors should be consistently dimensioned within a collection - -### Text Encoding -- Chunk text is handled as UTF-8 encoded bytes internally -- WebSocket API accepts/returns plain text strings -- Base64 encoding used for binary data in query responses - -## Python SDK - -```python -from trustgraph.clients.document_embeddings_client import DocumentEmbeddingsClient - -# Create client -client = DocumentEmbeddingsClient() - -# Query similar documents -request = { - "vectors": [[0.1, 0.2, 0.3, 0.4]], - "limit": 5, - "user": "alice", - "collection": "research" -} - -response = await client.query(request) -documents = response.documents -``` - -## Integration with TrustGraph - -### Storage Integration -- Document embeddings are stored in vector databases -- Metadata is cross-referenced with knowledge graph -- Supports multi-tenant isolation by user and collection - -### Processing Pipeline -1. **Document Ingestion**: Text documents loaded via text-load API -2. **Chunking**: Documents split into manageable chunks -3. **Embedding Generation**: Vector embeddings created for each chunk -4. **Storage**: Embeddings stored via import API -5. **Retrieval**: Similar documents found via query API - -### Use Cases -- **Semantic Search**: Find documents similar to query embeddings -- **RAG Systems**: Retrieve relevant document chunks for question answering -- **Document Clustering**: Group similar documents using embeddings -- **Content Recommendations**: Suggest related documents to users -- **Knowledge Discovery**: Find connections between document collections - -## Error Handling - -Common error scenarios: -- Invalid vector dimensions -- Missing required metadata fields -- User/collection access restrictions -- WebSocket connection failures -- Malformed JSON data - -Errors are returned in the response `error` field: -```json -{ - "error": { - "type": "ValidationError", - "message": "Invalid vector dimensions" - } -} -``` - -## Performance Considerations - -- **Batch Processing**: Import multiple documents in single WebSocket session -- **Vector Dimensions**: Consistent embedding dimensions improve performance -- **Collection Sizing**: Limit collections to reasonable sizes for query performance -- **Real-time vs Batch**: Choose appropriate method based on use case requirements \ No newline at end of file diff --git a/docs/apis/api-document-load.md b/docs/apis/api-document-load.md deleted file mode 100644 index dfc6a87a..00000000 --- a/docs/apis/api-document-load.md +++ /dev/null @@ -1,3 +0,0 @@ - -Coming soon - diff --git a/docs/apis/api-document-rag.md b/docs/apis/api-document-rag.md deleted file mode 100644 index 1d923437..00000000 --- a/docs/apis/api-document-rag.md +++ /dev/null @@ -1,96 +0,0 @@ -# TrustGraph Document RAG API - -This presents a prompt to the Document RAG service and retrieves the answer. -This makes use of a number of the other APIs behind the scenes: -Embeddings, Document Embeddings, Prompt, TextCompletion, Triples Query. - -## Request/response - -### Request - -The request contains the following fields: -- `query`: The question to answer - -### Response - -The response contains the following fields: -- `response`: LLM response - -## REST service - -The REST service accepts a request object containing the `query` field. -The response is a JSON object containing the `response` field. - -e.g. - -Request: -``` -{ - "query": "What does NASA stand for?" -} -``` - -Response: - -``` -{ - "response": "National Aeronautics and Space Administration" -} -``` - -## Websocket - -Requests have a `request` object containing the `query` field. -Responses have a `response` object containing `response` field. - -e.g. - -Request: - -``` -{ - "id": "blrqotfefnmnh7de-14", - "service": "document-rag", - "flow": "default", - "request": { - "query": "What does NASA stand for?" - } -} -``` - -Response: - -``` -{ - "id": "blrqotfefnmnh7de-14", - "response": { - "response": "National Aeronautics and Space Administration" - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Document RAG API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/retrieval.py - -Default request queue: -`non-persistent://tg/request/document-rag` - -Default response queue: -`non-persistent://tg/response/document-rag` - -Request schema: -`trustgraph.schema.DocumentRagQuery` - -Response schema: -`trustgraph.schema.DocumentRagResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.DocumentRagClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/document_rag_client.py \ No newline at end of file diff --git a/docs/apis/api-embeddings.md b/docs/apis/api-embeddings.md deleted file mode 100644 index 7eda096d..00000000 --- a/docs/apis/api-embeddings.md +++ /dev/null @@ -1,107 +0,0 @@ - -# TrustGraph Embeddings API - -## Request/response - -### Request - -The request contains the following fields: -- `text`: A string, the text to apply the embedding to - -### Response - -The response contains the following fields: -- `vectors`: Embeddings response, an array of arrays. An embedding is - an array of floating-point numbers. As multiple embeddings may be - returned, an array of embeddings is returned, hence an array - of arrays. - -## REST service - -The REST service accepts a request object containing the question field. -The response is a JSON object containing the `answer` field. - -e.g. - -Request: -``` -{ - "text": "What does NASA stand for?" -} -``` - -Response: - -``` -{ - "vectors": [ 0.231341245, ... ] -} -``` - -## Websocket - -Embeddings requests have a `request` object containing the `text` field. -Responses have a `response` object containing `vectors` field. - -e.g. - -Request: - -``` -{ - "id": "qgzw1287vfjc8wsk-2", - "service": "embeddings", - "flow": "default", - "request": { - "text": "What is a cat?" - } -} -``` - -Responses: - -``` - - -{ - "id": "qgzw1287vfjc8wsk-2", - "response": { - "vectors": [ - [ - 0.04013510048389435, - 0.07536131888628006, - ... - -0.023531345650553703, - 0.03591292351484299 - ] - ] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Embeddings API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/models.py - -Default request queue: -`non-persistent://tg/request/embeddings` - -Default response queue: -`non-persistent://tg/response/embeddings` - -Request schema: -`trustgraph.schema.EmbeddingsRequest` - -Response schema: -`trustgraph.schema.EmbeddingsResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.EmbeddingsClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/embeddings_client.py - diff --git a/docs/apis/api-entity-contexts.md b/docs/apis/api-entity-contexts.md deleted file mode 100644 index bbbcce81..00000000 --- a/docs/apis/api-entity-contexts.md +++ /dev/null @@ -1,259 +0,0 @@ -# TrustGraph Entity Contexts API - -This API provides import and export capabilities for entity contexts data. Entity contexts -associate entities with their textual context information, commonly used for entity -descriptions, definitions, or explanatory text in knowledge graphs. - -## Schema Overview - -### EntityContext Structure -- `entity`: Entity identifier (Value object with value, is_uri, type) -- `context`: Textual context or description string - -### EntityContexts Structure -- `metadata`: Metadata including ID, user, collection, and RDF triples -- `entities`: Array of EntityContext objects - -### Value Structure -- `value`: The entity value as string -- `is_uri`: Boolean indicating if the value is a URI -- `type`: Data type of the value (optional) - -## Import/Export Operations - -### Import - WebSocket Endpoint - -**Endpoint:** `/api/v1/flow/{flow}/import/entity-contexts` - -**Method:** WebSocket connection - -**Request Format:** -```json -{ - "metadata": { - "id": "context-batch-123", - "user": "alice", - "collection": "research", - "metadata": [ - { - "s": {"value": "source-doc", "is_uri": true}, - "p": {"value": "dc:title", "is_uri": true}, - "o": {"value": "Research Paper", "is_uri": false} - } - ] - }, - "entities": [ - { - "entity": { - "v": "https://example.com/Person/JohnDoe", - "e": true - }, - "context": "John Doe is a researcher at MIT specializing in artificial intelligence and machine learning." - }, - { - "entity": { - "v": "https://example.com/Organization/MIT", - "e": true - }, - "context": "Massachusetts Institute of Technology (MIT) is a private research university in Cambridge, Massachusetts." - }, - { - "entity": { - "v": "machine learning", - "e": false - }, - "context": "Machine learning is a method of data analysis that automates analytical model building using algorithms." - } - ] -} -``` - -**Response:** Import operations are fire-and-forget with no response payload. - -### Export - WebSocket Endpoint - -**Endpoint:** `/api/v1/flow/{flow}/export/entity-contexts` - -**Method:** WebSocket connection - -The export endpoint streams entity contexts data in real-time. Each message contains: - -```json -{ - "metadata": { - "id": "context-batch-123", - "user": "alice", - "collection": "research", - "metadata": [ - { - "s": {"value": "source-doc", "is_uri": true}, - "p": {"value": "dc:title", "is_uri": true}, - "o": {"value": "Research Paper", "is_uri": false} - } - ] - }, - "entities": [ - { - "entity": { - "v": "https://example.com/Person/JohnDoe", - "e": true - }, - "context": "John Doe is a researcher at MIT specializing in artificial intelligence." - } - ] -} -``` - -## WebSocket Usage Examples - -### Importing Entity Contexts - -```javascript -// Connect to import endpoint -const ws = new WebSocket('ws://api-gateway:8080/api/v1/flow/my-flow/import/entity-contexts'); - -// Send entity contexts -ws.send(JSON.stringify({ - metadata: { - id: "context-batch-1", - user: "alice", - collection: "research" - }, - entities: [ - { - entity: { - v: "Albert Einstein", - e: false - }, - context: "Albert Einstein was a German-born theoretical physicist widely acknowledged to be one of the greatest physicists of all time." - } - ] -})); -``` - -### Exporting Entity Contexts - -```javascript -// Connect to export endpoint -const ws = new WebSocket('ws://api-gateway:8080/api/v1/flow/my-flow/export/entity-contexts'); - -// Listen for exported data -ws.onmessage = (event) => { - const entityContexts = JSON.parse(event.data); - console.log('Received contexts for', entityContexts.entities.length, 'entities'); - - entityContexts.entities.forEach(item => { - console.log('Entity:', item.entity.v); - console.log('Context:', item.context); - }); -}; -``` - -## Data Format Details - -### Entity Format -The `entity` field uses the Value structure: -- `v`: The entity value (name, URI, identifier) -- `e`: Boolean indicating if it's a URI entity (true) or literal (false) -- `type`: Optional data type specification - -### Context Format -- Plain text string providing description or context -- Can include definitions, explanations, or background information -- Supports multi-sentence descriptions and detailed context - -### Metadata Format -Each metadata triple contains: -- `s`: Subject (object with `value` and `is_uri` fields) -- `p`: Predicate (object with `value` and `is_uri` fields) -- `o`: Object (object with `value` and `is_uri` fields) - -## Integration with TrustGraph - -### Storage Integration -- Entity contexts are stored in graph databases -- Links entities to their descriptive text -- Supports multi-tenant isolation by user and collection - -### Processing Pipeline -1. **Text Analysis**: Extract entities from documents -2. **Context Extraction**: Identify descriptive text for entities -3. **Entity Linking**: Associate entities with their contexts -4. **Import**: Store entity-context pairs via import API -5. **Knowledge Enhancement**: Use contexts for better entity understanding - -### Use Cases -- **Entity Disambiguation**: Provide context to distinguish similar entities -- **Knowledge Base Enhancement**: Add descriptive information to entities -- **Question Answering**: Use entity contexts to provide detailed answers -- **Entity Summarization**: Generate summaries based on collected contexts -- **Knowledge Graph Visualization**: Display rich entity information - -## Authentication - -Both import and export endpoints support authentication: -- API token authentication via Authorization header -- Flow-based access control -- User and collection isolation - -## Error Handling - -Common error scenarios: -- Invalid JSON format -- Missing required metadata fields -- User/collection access restrictions -- WebSocket connection failures -- Invalid entity value formats - -Errors are typically handled at the WebSocket connection level with connection termination or error messages. - -## Performance Considerations - -- **Batch Processing**: Import multiple entity contexts in single messages -- **Context Length**: Balance detailed context with performance -- **Flow Capacity**: Ensure target flow can handle entity context volume -- **Real-time vs Batch**: Choose appropriate method based on use case - -## Python Integration - -While no direct Python SDK is mentioned in the codebase, integration can be achieved through: - -```python -import websocket -import json - -# Connect to import endpoint -def import_entity_contexts(flow_id, contexts_data): - ws_url = f"ws://api-gateway:8080/api/v1/flow/{flow_id}/import/entity-contexts" - ws = websocket.create_connection(ws_url) - - # Send data - ws.send(json.dumps(contexts_data)) - ws.close() - -# Usage example -contexts = { - "metadata": { - "id": "batch-1", - "user": "alice", - "collection": "research" - }, - "entities": [ - { - "entity": {"v": "Neural Networks", "e": False}, - "context": "Neural networks are computing systems inspired by biological neural networks." - } - ] -} - -import_entity_contexts("my-flow", contexts) -``` - -## Features - -- **Real-time Streaming**: WebSocket-based import/export for live data flow -- **Batch Operations**: Process multiple entity contexts efficiently -- **Rich Metadata**: Full metadata support with RDF triples -- **Entity Types**: Support for both URI entities and literal values -- **Flow Integration**: Direct integration with TrustGraph processing flows -- **Multi-tenant Support**: User and collection-based data isolation \ No newline at end of file diff --git a/docs/apis/api-flow.md b/docs/apis/api-flow.md deleted file mode 100644 index fbd3d660..00000000 --- a/docs/apis/api-flow.md +++ /dev/null @@ -1,301 +0,0 @@ -# TrustGraph Flow API - -This API provides workflow management for TrustGraph components. It manages flow blueprintes -(workflow templates) and flow instances (active running workflows) that orchestrate -complex data processing pipelines. - -## Request/response - -### Request - -The request contains the following fields: -- `operation`: The operation to perform (see operations below) -- `blueprint_name`: Flow blueprint name (for class operations and start-flow) -- `class_definition`: Flow blueprint definition JSON (for put-class) -- `description`: Flow description (for start-flow) -- `flow_id`: Flow instance ID (for flow instance operations) - -### Response - -The response contains the following fields: -- `blueprint_names`: Array of flow blueprint names (returned by list-classes) -- `flow_ids`: Array of active flow IDs (returned by list-flows) -- `class_definition`: Flow blueprint definition JSON (returned by get-class) -- `flow`: Flow instance JSON (returned by get-flow) -- `description`: Flow description (returned by get-flow) -- `error`: Error information if operation fails - -## Operations - -### Flow Blueprint Operations - -#### LIST-CLASSES - List All Flow Blueprintes - -Request: -```json -{ - "operation": "list-classes" -} -``` - -Response: -```json -{ - "blueprint_names": ["pdf-processor", "text-analyzer", "knowledge-extractor"] -} -``` - -#### GET-CLASS - Get Flow Blueprint Definition - -Request: -```json -{ - "operation": "get-class", - "blueprint_name": "pdf-processor" -} -``` - -Response: -```json -{ - "class_definition": "{\"interfaces\": {\"text-completion\": {\"request\": \"persistent://tg/request/text-completion\", \"response\": \"persistent://tg/response/text-completion\"}}, \"description\": \"PDF processing workflow\"}" -} -``` - -#### PUT-CLASS - Create/Update Flow Blueprint - -Request: -```json -{ - "operation": "put-class", - "blueprint_name": "pdf-processor", - "class_definition": "{\"interfaces\": {\"text-completion\": {\"request\": \"persistent://tg/request/text-completion\", \"response\": \"persistent://tg/response/text-completion\"}}, \"description\": \"PDF processing workflow\"}" -} -``` - -Response: -```json -{} -``` - -#### DELETE-CLASS - Remove Flow Blueprint - -Request: -```json -{ - "operation": "delete-class", - "blueprint_name": "pdf-processor" -} -``` - -Response: -```json -{} -``` - -### Flow Instance Operations - -#### LIST-FLOWS - List Active Flow Instances - -Request: -```json -{ - "operation": "list-flows" -} -``` - -Response: -```json -{ - "flow_ids": ["flow-123", "flow-456", "flow-789"] -} -``` - -#### GET-FLOW - Get Flow Instance - -Request: -```json -{ - "operation": "get-flow", - "flow_id": "flow-123" -} -``` - -Response: -```json -{ - "flow": "{\"interfaces\": {\"text-completion\": {\"request\": \"persistent://tg/request/text-completion-flow-123\", \"response\": \"persistent://tg/response/text-completion-flow-123\"}}}", - "description": "PDF processing workflow instance" -} -``` - -#### START-FLOW - Start Flow Instance - -Request: -```json -{ - "operation": "start-flow", - "blueprint_name": "pdf-processor", - "flow_id": "flow-123", - "description": "Processing document batch 1" -} -``` - -Response: -```json -{} -``` - -#### STOP-FLOW - Stop Flow Instance - -Request: -```json -{ - "operation": "stop-flow", - "flow_id": "flow-123" -} -``` - -Response: -```json -{} -``` - -## REST service - -The REST service is available at `/api/v1/flow` and accepts the above request formats. - -## Websocket - -Requests have a `request` object containing the operation fields. -Responses have a `response` object containing the response fields. - -Request: -```json -{ - "id": "unique-request-id", - "service": "flow", - "request": { - "operation": "list-classes" - } -} -``` - -Response: -```json -{ - "id": "unique-request-id", - "response": { - "blueprint_names": ["pdf-processor", "text-analyzer"] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Flow API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/flows.py - -Default request queue: -`non-persistent://tg/request/flow` - -Default response queue: -`non-persistent://tg/response/flow` - -Request schema: -`trustgraph.schema.FlowRequest` - -Response schema: -`trustgraph.schema.FlowResponse` - -## Flow Service Methods - -Flow instances provide access to various TrustGraph services through flow-specific endpoints: - -### MCP Tool Service - Invoke MCP Tools - -The `mcp_tool` method allows invoking MCP (Model Control Protocol) tools within a flow context. - -Request: -```json -{ - "name": "file-reader", - "parameters": { - "path": "/path/to/file.txt" - } -} -``` - -Response: -```json -{ - "object": {"content": "file contents here", "size": 1024} -} -``` - -Or for text responses: -```json -{ - "text": "plain text response" -} -``` - -### Other Service Methods - -Flow instances also provide access to: -- `text_completion` - LLM text completion -- `agent` - Agent question answering -- `graph_rag` - Graph-based RAG queries -- `document_rag` - Document-based RAG queries -- `embeddings` - Text embeddings -- `prompt` - Prompt template processing -- `triples_query` - Knowledge graph queries -- `load_document` - Document loading -- `load_text` - Text loading - -## Python SDK - -The Python SDK provides convenient access to the Flow API: - -```python -from trustgraph.api.flow import FlowClient - -client = FlowClient() - -# List all flow blueprintes -classes = await client.list_blueprints() - -# Get a flow blueprint definition -definition = await client.get_blueprint("pdf-processor") - -# Start a flow instance -await client.start_flow("pdf-processor", "flow-123", "Processing batch 1") - -# List active flows -flows = await client.list_flows() - -# Stop a flow instance -await client.stop_flow("flow-123") - -# Use flow instance services -flow = client.id("flow-123") -result = await flow.mcp_tool("file-reader", {"path": "/path/to/file.txt"}) -``` - -## Features - -- **Flow Blueprintes**: Templates that define workflow structure and interfaces -- **Flow Instances**: Active running workflows based on flow blueprintes -- **Dynamic Management**: Flows can be started/stopped dynamically -- **Template Processing**: Uses template replacement for customizing flow instances -- **Integration**: Works with TrustGraph ecosystem for data processing pipelines -- **Persistent Storage**: Flow definitions and instances stored for reliability - -## Use Cases - -- **Document Processing**: Orchestrating PDF processing through chunking, extraction, and storage -- **Knowledge Extraction**: Managing workflows for relationship and definition extraction -- **Data Pipelines**: Coordinating complex multi-step data processing workflows -- **Resource Management**: Dynamically scaling processing flows based on demand \ No newline at end of file diff --git a/docs/apis/api-graph-embeddings.md b/docs/apis/api-graph-embeddings.md deleted file mode 100644 index 368d4678..00000000 --- a/docs/apis/api-graph-embeddings.md +++ /dev/null @@ -1,156 +0,0 @@ - -# TrustGraph Graph Embeddings API - -The purpose of this API is to search for knowledge graph entities -by embeddings. The request is a list of embeddings, the response is -a list of knowledge graph entities. The search is performed using a -vector store. - -## Request/response - -### Request - -The request contains the following fields: -- `vectors`: An array of embeddings. Each embedding is itself an array - of numbers. -- `limit`: Optional: a limit on the number of graph entities to return. - -### Response - -The response contains the following fields: -- `entities`: An array of graph entities. The entity type is described here: - -TrustGraph uses the same schema for knowledge graph elements: -- `value`: the entity URI or literal value depending on whether this is - graph entity or literal value. -- `is_uri`: A boolean value which is true if this is a graph entity i.e. - `value` is a URI, not a literal value. - -## REST service - -The REST service accepts a request object containing the `vectors` field. -The response is a JSON object containing the `entities` field. - -To reduce the size of the JSON, the graph entities are encoded as an -object with `value` and `is_uri` mapped to `v` and `e` respectively. - -e.g. - -Request: -``` -{ - "vectors": [ - [ - 0.04013510048389435, - 0.07536131888628006, - ... - -0.10790473222732544, - 0.03591292351484299 - ] - ], - "limit": 15 -} -``` - -Response: - -``` -{ - "entities": [ - { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - { - "v": "http://trustgraph.ai/e/rocket-propellants", - "e": true - }, - ] -} -``` - -## Websocket - -The websocket service accepts a request object containing the `vectors` field. -The response is a JSON object containing the `entities` field. - -To reduce the size of the JSON, the graph entities are encoded as an -object with `value` and `is_uri` mapped to `v` and `e` respectively. - -e.g. - -Request: - -``` -{ - "id": "qgzw1287vfjc8wsk-3", - "service": "graph-embeddings-query", - "flow": "default", - "request": { - "vectors": [ - [ - 0.04013510048389435, - 0.07536131888628006, - ... - -0.10790473222732544, - 0.03591292351484299 - ] - ], - "limit": 15 - } -} -``` - -Response: - -``` -{ - "id": "qgzw1287vfjc8wsk-3", - "response": { - "entities": [ - { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - { - "v": "http://trustgraph.ai/e/rocket-propellants", - "e": true - }, - ] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Graph Embeddings API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/graph.py - -Default request queue: -`non-persistent://tg/request/graph-embeddings` - -Default response queue: -`non-persistent://tg/response/graph-embeddings` - -Request schema: -`trustgraph.schema.GraphEmbeddingsRequest` - -Response schema: -`trustgraph.schema.GraphEmbeddingsResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.GraphEmbeddingsClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/graph_embeddings.py - - - - - - - - diff --git a/docs/apis/api-graph-rag.md b/docs/apis/api-graph-rag.md deleted file mode 100644 index b32c4682..00000000 --- a/docs/apis/api-graph-rag.md +++ /dev/null @@ -1,98 +0,0 @@ - -# TrustGraph Graph RAG API - -This presents a prompt to the Graph RAG service and retrieves the answer. -This makes use of a number of the other APIs behind the scenes: -Embeddings, Graph Embeddings, Prompt, TextCompletion, Triples Query. - -## Request/response - -### Request - -The request contains the following fields: -- `query`: The question to answer - -### Response - -The response contains the following fields: -- `response`: LLM response - -## REST service - -The REST service accepts a request object containing the `query` field. -The response is a JSON object containing the `response` field. - -e.g. - -Request: -``` -{ - "query": "What does NASA stand for?" -} -``` - -Response: - -``` -{ - "response": "National Aeronautics and Space Administration" -} -``` - -## Websocket - -Requests have a `request` object containing the `query` field. -Responses have a `response` object containing `response` field. - -e.g. - -Request: - -``` -{ - "id": "blrqotfefnmnh7de-14", - "service": "graph-rag", - "flow": "default", - "request": { - "query": "What does NASA stand for?" - } -} -``` - -Response: - -``` -{ - "id": "blrqotfefnmnh7de-14", - "response": { - "response": "National Aeronautics and Space Administration" - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Graph RAG API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/retrieval.py - -Default request queue: -`non-persistent://tg/request/graph-rag` - -Default response queue: -`non-persistent://tg/response/graph-rag` - -Request schema: -`trustgraph.schema.GraphRagRequest` - -Response schema: -`trustgraph.schema.GraphRagResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.GraphRagClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/graph_rag_client.py - diff --git a/docs/apis/api-knowledge.md b/docs/apis/api-knowledge.md deleted file mode 100644 index fd053784..00000000 --- a/docs/apis/api-knowledge.md +++ /dev/null @@ -1,310 +0,0 @@ -# TrustGraph Knowledge API - -This API provides knowledge graph management for TrustGraph. It handles storage, retrieval, -and flow integration of knowledge cores containing RDF triples and graph embeddings with -multi-tenant support. - -## Request/response - -### Request - -The request contains the following fields: -- `operation`: The operation to perform (see operations below) -- `user`: User identifier (for user-specific operations) -- `id`: Knowledge core identifier -- `flow`: Flow identifier (for load operations) -- `collection`: Collection identifier (for load operations) -- `triples`: RDF triples data (for put operations) -- `graph_embeddings`: Graph embeddings data (for put operations) - -### Response - -The response contains the following fields: -- `error`: Error information if operation fails -- `ids`: Array of knowledge core IDs (returned by list operation) -- `eos`: End of stream indicator for streaming responses -- `triples`: RDF triples data (returned by get operation) -- `graph_embeddings`: Graph embeddings data (returned by get operation) - -## Operations - -### PUT-KG-CORE - Store Knowledge Core - -Request: -```json -{ - "operation": "put-kg-core", - "user": "alice", - "id": "core-123", - "triples": { - "metadata": { - "id": "core-123", - "user": "alice", - "collection": "research" - }, - "triples": [ - { - "s": {"value": "Person1", "is_uri": true}, - "p": {"value": "hasName", "is_uri": true}, - "o": {"value": "John Doe", "is_uri": false} - }, - { - "s": {"value": "Person1", "is_uri": true}, - "p": {"value": "worksAt", "is_uri": true}, - "o": {"value": "Company1", "is_uri": true} - } - ] - }, - "graph_embeddings": { - "metadata": { - "id": "core-123", - "user": "alice", - "collection": "research" - }, - "entities": [ - { - "entity": {"value": "Person1", "is_uri": true}, - "vectors": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] - } - ] - } -} -``` - -Response: -```json -{} -``` - -### GET-KG-CORE - Retrieve Knowledge Core - -Request: -```json -{ - "operation": "get-kg-core", - "id": "core-123" -} -``` - -Response: -```json -{ - "triples": { - "metadata": { - "id": "core-123", - "user": "alice", - "collection": "research" - }, - "triples": [ - { - "s": {"value": "Person1", "is_uri": true}, - "p": {"value": "hasName", "is_uri": true}, - "o": {"value": "John Doe", "is_uri": false} - } - ] - }, - "graph_embeddings": { - "metadata": { - "id": "core-123", - "user": "alice", - "collection": "research" - }, - "entities": [ - { - "entity": {"value": "Person1", "is_uri": true}, - "vectors": [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] - } - ] - } -} -``` - -### LIST-KG-CORES - List Knowledge Cores - -Request: -```json -{ - "operation": "list-kg-cores", - "user": "alice" -} -``` - -Response: -```json -{ - "ids": ["core-123", "core-456", "core-789"] -} -``` - -### DELETE-KG-CORE - Delete Knowledge Core - -Request: -```json -{ - "operation": "delete-kg-core", - "user": "alice", - "id": "core-123" -} -``` - -Response: -```json -{} -``` - -### LOAD-KG-CORE - Load Knowledge Core into Flow - -Request: -```json -{ - "operation": "load-kg-core", - "id": "core-123", - "flow": "qa-flow", - "collection": "research" -} -``` - -Response: -```json -{} -``` - -### UNLOAD-KG-CORE - Unload Knowledge Core from Flow - -Request: -```json -{ - "operation": "unload-kg-core", - "id": "core-123" -} -``` - -Response: -```json -{} -``` - -## Data Structures - -### Triple Structure -Each RDF triple contains: -- `s`: Subject (Value object) -- `p`: Predicate (Value object) -- `o`: Object (Value object) - -### Value Structure -- `value`: The actual value as string -- `is_uri`: Boolean indicating if value is a URI -- `type`: Data type of the value (optional) - -### Triples Structure -- `metadata`: Metadata including ID, user, collection -- `triples`: Array of Triple objects - -### Graph Embeddings Structure -- `metadata`: Metadata including ID, user, collection -- `entities`: Array of EntityEmbeddings objects - -### Entity Embeddings Structure -- `entity`: The entity being embedded (Value object) -- `vectors`: Array of vector embeddings (Array of Array of Double) - -## REST service - -The REST service is available at `/api/v1/knowledge` and accepts the above request formats. - -## Websocket - -Requests have a `request` object containing the operation fields. -Responses have a `response` object containing the response fields. - -Request: -```json -{ - "id": "unique-request-id", - "service": "knowledge", - "request": { - "operation": "list-kg-cores", - "user": "alice" - } -} -``` - -Response: -```json -{ - "id": "unique-request-id", - "response": { - "ids": ["core-123", "core-456"] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Knowledge API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/knowledge.py - -Default request queue: -`non-persistent://tg/request/knowledge` - -Default response queue: -`non-persistent://tg/response/knowledge` - -Request schema: -`trustgraph.schema.KnowledgeRequest` - -Response schema: -`trustgraph.schema.KnowledgeResponse` - -## Python SDK - -The Python SDK provides convenient access to the Knowledge API: - -```python -from trustgraph.api.knowledge import KnowledgeClient - -client = KnowledgeClient() - -# List knowledge cores -cores = await client.list_kg_cores("alice") - -# Get a knowledge core -core = await client.get_kg_core("core-123") - -# Store a knowledge core -await client.put_kg_core( - user="alice", - id="core-123", - triples=triples_data, - graph_embeddings=embeddings_data -) - -# Load core into flow -await client.load_kg_core("core-123", "qa-flow", "research") - -# Delete a knowledge core -await client.delete_kg_core("alice", "core-123") -``` - -## Features - -- **Knowledge Core Management**: Store, retrieve, list, and delete knowledge cores -- **Dual Data Types**: Support for both RDF triples and graph embeddings -- **Flow Integration**: Load knowledge cores into processing flows -- **Multi-tenant Support**: User-specific knowledge cores with isolation -- **Streaming Support**: Efficient transfer of large knowledge cores -- **Collection Organization**: Group knowledge cores by collection -- **Semantic Reasoning**: RDF triples enable symbolic reasoning -- **Vector Similarity**: Graph embeddings enable neural approaches - -## Use Cases - -- **Knowledge Base Construction**: Build semantic knowledge graphs from documents -- **Question Answering**: Load knowledge cores for graph-based QA systems -- **Semantic Search**: Use embeddings for similarity-based knowledge retrieval -- **Multi-domain Knowledge**: Organize knowledge by user and collection -- **Hybrid Reasoning**: Combine symbolic (triples) and neural (embeddings) approaches -- **Knowledge Transfer**: Export and import knowledge cores between systems \ No newline at end of file diff --git a/docs/apis/api-librarian.md b/docs/apis/api-librarian.md deleted file mode 100644 index 43db4258..00000000 --- a/docs/apis/api-librarian.md +++ /dev/null @@ -1,392 +0,0 @@ -# TrustGraph Librarian API - -This API provides document library management for TrustGraph. It handles document storage, -metadata management, and processing orchestration using hybrid storage (S3-compatible object -storage for content, Cassandra for metadata) with multi-user support. - -## Request/response - -### Request - -The request contains the following fields: -- `operation`: The operation to perform (see operations below) -- `document_id`: Document identifier (for document operations) -- `document_metadata`: Document metadata object (for add/update operations) - - `id`: Document identifier (required) - - `time`: Unix timestamp in seconds as a float (required for add operations) - - `kind`: MIME type of document (required, e.g., "text/plain", "application/pdf") - - `title`: Document title (optional) - - `comments`: Document comments (optional) - - `user`: Document owner (required) - - `tags`: Array of tags (optional) - - `metadata`: Array of RDF triples (optional) - each triple has: - - `s`: Subject with `v` (value) and `e` (is_uri boolean) - - `p`: Predicate with `v` (value) and `e` (is_uri boolean) - - `o`: Object with `v` (value) and `e` (is_uri boolean) -- `content`: Document content as base64-encoded bytes (for add operations) -- `processing_id`: Processing job identifier (for processing operations) -- `processing_metadata`: Processing metadata object (for add-processing) -- `user`: User identifier (required for most operations) -- `collection`: Collection filter (optional for list operations) -- `criteria`: Query criteria array (for filtering operations) - -### Response - -The response contains the following fields: -- `error`: Error information if operation fails -- `document_metadata`: Single document metadata (for get operations) -- `content`: Document content as base64-encoded bytes (for get-content) -- `document_metadatas`: Array of document metadata (for list operations) -- `processing_metadatas`: Array of processing metadata (for list-processing) - -## Document Operations - -### ADD-DOCUMENT - Add Document to Library - -Request: -```json -{ - "operation": "add-document", - "document_metadata": { - "id": "doc-123", - "time": 1640995200.0, - "kind": "application/pdf", - "title": "Research Paper", - "comments": "Important research findings", - "user": "alice", - "tags": ["research", "ai", "machine-learning"], - "metadata": [ - { - "s": { - "v": "http://example.com/doc-123", - "e": true - }, - "p": { - "v": "http://purl.org/dc/elements/1.1/creator", - "e": true - }, - "o": { - "v": "Dr. Smith", - "e": false - } - } - ] - }, - "content": "JVBERi0xLjQKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwovUGFnZXMgMiAwIFIKPj4KZW5kb2JqCg==" -} -``` - -Response: -```json -{} -``` - -### GET-DOCUMENT-METADATA - Get Document Metadata - -Request: -```json -{ - "operation": "get-document-metadata", - "document_id": "doc-123", - "user": "alice" -} -``` - -Response: -```json -{ - "document_metadata": { - "id": "doc-123", - "time": 1640995200.0, - "kind": "application/pdf", - "title": "Research Paper", - "comments": "Important research findings", - "user": "alice", - "tags": ["research", "ai", "machine-learning"], - "metadata": [ - { - "s": { - "v": "http://example.com/doc-123", - "e": true - }, - "p": { - "v": "http://purl.org/dc/elements/1.1/creator", - "e": true - }, - "o": { - "v": "Dr. Smith", - "e": false - } - } - ] - } -} -``` - -### GET-DOCUMENT-CONTENT - Get Document Content - -Request: -```json -{ - "operation": "get-document-content", - "document_id": "doc-123", - "user": "alice" -} -``` - -Response: -```json -{ - "content": "JVBERi0xLjQKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwovUGFnZXMgMiAwIFIKPj4KZW5kb2JqCg==" -} -``` - -### LIST-DOCUMENTS - List User's Documents - -Request: -```json -{ - "operation": "list-documents", - "user": "alice", - "collection": "research" -} -``` - -Response: -```json -{ - "document_metadatas": [ - { - "id": "doc-123", - "time": 1640995200.0, - "kind": "application/pdf", - "title": "Research Paper", - "comments": "Important research findings", - "user": "alice", - "tags": ["research", "ai"] - }, - { - "id": "doc-124", - "time": 1640995300.0, - "kind": "text/plain", - "title": "Meeting Notes", - "comments": "Team meeting discussion", - "user": "alice", - "tags": ["meeting", "notes"] - } - ] -} -``` - -### UPDATE-DOCUMENT - Update Document Metadata - -Request: -```json -{ - "operation": "update-document", - "document_metadata": { - "id": "doc-123", - "time": 1640995500.0, - "title": "Updated Research Paper", - "comments": "Updated findings and conclusions", - "user": "alice", - "tags": ["research", "ai", "machine-learning", "updated"], - "metadata": [] - } -} -``` - -Response: -```json -{} -``` - -### REMOVE-DOCUMENT - Remove Document - -Request: -```json -{ - "operation": "remove-document", - "document_id": "doc-123", - "user": "alice" -} -``` - -Response: -```json -{} -``` - -## Processing Operations - -### ADD-PROCESSING - Start Document Processing - -Request: -```json -{ - "operation": "add-processing", - "processing_metadata": { - "id": "proc-456", - "document_id": "doc-123", - "time": 1640995400.0, - "flow": "pdf-extraction", - "user": "alice", - "collection": "research", - "tags": ["extraction", "nlp"] - } -} -``` - -Response: -```json -{} -``` - -### LIST-PROCESSING - List Processing Jobs - -Request: -```json -{ - "operation": "list-processing", - "user": "alice", - "collection": "research" -} -``` - -Response: -```json -{ - "processing_metadatas": [ - { - "id": "proc-456", - "document_id": "doc-123", - "time": 1640995400.0, - "flow": "pdf-extraction", - "user": "alice", - "collection": "research", - "tags": ["extraction", "nlp"] - } - ] -} -``` - -### REMOVE-PROCESSING - Stop Processing Job - -Request: -```json -{ - "operation": "remove-processing", - "processing_id": "proc-456", - "user": "alice" -} -``` - -Response: -```json -{} -``` - -## REST service - -The REST service is available at `/api/v1/librarian` and accepts the above request formats. - -## Websocket - -Requests have a `request` object containing the operation fields. -Responses have a `response` object containing the response fields. - -Request: -```json -{ - "id": "unique-request-id", - "service": "librarian", - "request": { - "operation": "list-documents", - "user": "alice" - } -} -``` - -Response: -```json -{ - "id": "unique-request-id", - "response": { - "document_metadatas": [...] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Librarian API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/library.py - -Default request queue: -`non-persistent://tg/request/librarian` - -Default response queue: -`non-persistent://tg/response/librarian` - -Request schema: -`trustgraph.schema.LibrarianRequest` - -Response schema: -`trustgraph.schema.LibrarianResponse` - -## Python SDK - -The Python SDK provides convenient access to the Librarian API: - -```python -from trustgraph.api.library import LibrarianClient - -client = LibrarianClient() - -# Add a document -with open("document.pdf", "rb") as f: - content = f.read() - -await client.add_document( - doc_id="doc-123", - title="Research Paper", - content=content, - user="alice", - tags=["research", "ai"] -) - -# Get document metadata -metadata = await client.get_document_metadata("doc-123", "alice") - -# List documents -documents = await client.list_documents("alice", collection="research") - -# Start processing -await client.add_processing( - processing_id="proc-456", - document_id="doc-123", - flow="pdf-extraction", - user="alice" -) -``` - -## Features - -- **Hybrid Storage**: S3-compatible object storage (MinIO, Ceph RGW, AWS S3, etc.) for content, Cassandra for metadata -- **Multi-user Support**: User-based document ownership and access control -- **Rich Metadata**: RDF-style metadata triples and tagging system -- **Processing Integration**: Automatic triggering of document processing workflows -- **Content Types**: Support for multiple document formats (PDF, text, etc.) -- **Collection Management**: Optional document grouping by collection -- **Metadata Search**: Query documents by metadata criteria -- **Flexible Storage Backend**: Works with any S3-compatible storage (MinIO, Ceph RADOS Gateway, AWS S3, Cloudflare R2, etc.) - -## Use Cases - -- **Document Management**: Store and organize documents with rich metadata -- **Knowledge Extraction**: Process documents to extract structured knowledge -- **Research Libraries**: Manage collections of research papers and documents -- **Content Processing**: Orchestrate document processing workflows -- **Multi-tenant Systems**: Support multiple users with isolated document libraries \ No newline at end of file diff --git a/docs/apis/api-mcp-tool.md b/docs/apis/api-mcp-tool.md deleted file mode 100644 index 452f4e90..00000000 --- a/docs/apis/api-mcp-tool.md +++ /dev/null @@ -1,137 +0,0 @@ -# TrustGraph MCP Tool API - -This is a higher-level interface to the MCP (Model Control Protocol) tool service. The input -specifies an MCP tool by name and parameters to pass to the tool. - -## Request/response - -### Request - -The request contains the following fields: -- `name`: The MCP tool name -- `parameters`: A set of key/values describing the tool parameters - -### Response - -The response contains either of these fields: -- `text`: A plain text response -- `object`: A structured object response - -## REST service - -The REST service accepts `name` and `parameters` fields, with parameters -encoded as a JSON object. - -e.g. - -In this example, the MCP tool takes parameters and returns a -structured response in the `object` field. - -Request: -``` -{ - "name": "file-reader", - "parameters": { - "path": "/path/to/file.txt" - } -} -``` - -Response: - -``` -{ - "object": {"content": "file contents here", "size": 1024} -} -``` - -## Websocket - -Requests have `name` and `parameters` fields. - -e.g. - -Request: - -``` -{ - "id": "akshfkiehfkseffh-142", - "service": "mcp-tool", - "flow": "default", - "request": { - "name": "file-reader", - "parameters": { - "path": "/path/to/file.txt" - } - } -} -``` - -Responses: - -``` -{ - "id": "akshfkiehfkseffh-142", - "response": { - "object": {"content": "file contents here", "size": 1024} - }, - "complete": true -} -``` - -e.g. - -An example which returns plain text - -Request: - -``` -{ - "id": "akshfkiehfkseffh-141", - "service": "mcp-tool", - "request": { - "name": "calculator", - "parameters": { - "expression": "2 + 2" - } - } -} -``` - -Response: - -``` -{ - "id": "akshfkiehfkseffh-141", - "response": { - "text": "4" - }, - "complete": true -} -``` - - -## Pulsar - -The Pulsar schema for the MCP Tool API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/mcp_tool.py - -Default request queue: -`non-persistent://tg/request/mcp-tool` - -Default response queue: -`non-persistent://tg/response/mcp-tool` - -Request schema: -`trustgraph.schema.McpToolRequest` - -Response schema: -`trustgraph.schema.McpToolResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.McpToolClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/mcp_tool_client.py diff --git a/docs/apis/api-metrics.md b/docs/apis/api-metrics.md deleted file mode 100644 index 4c194451..00000000 --- a/docs/apis/api-metrics.md +++ /dev/null @@ -1,313 +0,0 @@ -# TrustGraph Metrics API - -This API provides access to TrustGraph system metrics through a Prometheus proxy endpoint. -It allows authenticated access to monitoring and observability data from the TrustGraph -system components. - -## Overview - -The Metrics API is implemented as a proxy to a Prometheus metrics server, providing: -- System performance metrics -- Service health information -- Resource utilization data -- Request/response statistics -- Error rates and latency metrics - -## Authentication - -All metrics endpoints require Bearer token authentication: - -``` -Authorization: Bearer -``` - -Unauthorized requests return HTTP 401. - -## Endpoint - -**Base Path:** `/api/metrics` - -**Method:** GET - -**Description:** Proxies requests to the underlying Prometheus API - -## Usage Examples - -### Query Current Metrics - -```bash -# Get all available metrics -curl -H "Authorization: Bearer your-token" \ - "http://api-gateway:8080/api/metrics/query?query=up" - -# Get specific metric with time range -curl -H "Authorization: Bearer your-token" \ - "http://api-gateway:8080/api/metrics/query_range?query=cpu_usage&start=1640995200&end=1640998800&step=60" - -# Get metric metadata -curl -H "Authorization: Bearer your-token" \ - "http://api-gateway:8080/api/metrics/metadata" -``` - -### Common Prometheus API Endpoints - -The metrics API supports all standard Prometheus API endpoints: - -#### Instant Queries -``` -GET /api/metrics/query?query= -``` - -#### Range Queries -``` -GET /api/metrics/query_range?query=&start=&end=&step= -``` - -#### Metadata -``` -GET /api/metrics/metadata -GET /api/metrics/metadata?metric= -``` - -#### Series -``` -GET /api/metrics/series?match[]= -``` - -#### Label Values -``` -GET /api/metrics/label//values -``` - -#### Targets -``` -GET /api/metrics/targets -``` - -## Example Queries - -### System Health -```bash -# Check if services are up -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=up" - -# Get service uptime -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=time()-process_start_time_seconds" -``` - -### Performance Metrics -```bash -# CPU usage -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=rate(cpu_seconds_total[5m])" - -# Memory usage -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=process_resident_memory_bytes" - -# Request rate -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=rate(http_requests_total[5m])" -``` - -### TrustGraph-Specific Metrics -```bash -# Document processing rate -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=rate(trustgraph_documents_processed_total[5m])" - -# Knowledge graph size -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=trustgraph_triples_count" - -# Embedding generation rate -curl -H "Authorization: Bearer token" \ - "http://api-gateway:8080/api/metrics/query?query=rate(trustgraph_embeddings_generated_total[5m])" -``` - -## Response Format - -Responses follow the standard Prometheus API format: - -### Successful Query Response -```json -{ - "status": "success", - "data": { - "resultType": "vector", - "result": [ - { - "metric": { - "__name__": "up", - "instance": "api-gateway:8080", - "job": "trustgraph" - }, - "value": [1640995200, "1"] - } - ] - } -} -``` - -### Range Query Response -```json -{ - "status": "success", - "data": { - "resultType": "matrix", - "result": [ - { - "metric": { - "__name__": "cpu_usage", - "instance": "worker-1" - }, - "values": [ - [1640995200, "0.15"], - [1640995260, "0.18"], - [1640995320, "0.12"] - ] - } - ] - } -} -``` - -### Error Response -```json -{ - "status": "error", - "errorType": "bad_data", - "error": "invalid query syntax" -} -``` - -## Available Metrics - -### Standard System Metrics -- `up`: Service availability (1 = up, 0 = down) -- `process_resident_memory_bytes`: Memory usage -- `process_cpu_seconds_total`: CPU time -- `http_requests_total`: HTTP request count -- `http_request_duration_seconds`: Request latency - -### TrustGraph-Specific Metrics -- `trustgraph_documents_processed_total`: Documents processed count -- `trustgraph_triples_count`: Knowledge graph triple count -- `trustgraph_embeddings_generated_total`: Embeddings generated count -- `trustgraph_flow_executions_total`: Flow execution count -- `trustgraph_pulsar_messages_total`: Pulsar message count -- `trustgraph_errors_total`: Error count by component - -## Time Series Queries - -### Time Ranges -Use standard Prometheus time range formats: -- `5m`: 5 minutes -- `1h`: 1 hour -- `1d`: 1 day -- `1w`: 1 week - -### Rate Calculations -```bash -# 5-minute rate -rate(metric_name[5m]) - -# Increase over time -increase(metric_name[1h]) -``` - -### Aggregations -```bash -# Sum across instances -sum(metric_name) - -# Average by label -avg by (instance) (metric_name) - -# Top 5 values -topk(5, metric_name) -``` - -## Integration Examples - -### Python Integration -```python -import requests - -def query_metrics(token, query): - headers = {"Authorization": f"Bearer {token}"} - params = {"query": query} - - response = requests.get( - "http://api-gateway:8080/api/metrics/query", - headers=headers, - params=params - ) - - return response.json() - -# Get system uptime -uptime = query_metrics("your-token", "time() - process_start_time_seconds") -``` - -### JavaScript Integration -```javascript -async function queryMetrics(token, query) { - const response = await fetch( - `http://api-gateway:8080/api/metrics/query?query=${encodeURIComponent(query)}`, - { - headers: { - 'Authorization': `Bearer ${token}` - } - } - ); - - return await response.json(); -} - -// Get request rate -const requestRate = await queryMetrics('your-token', 'rate(http_requests_total[5m])'); -``` - -## Error Handling - -### Common HTTP Status Codes -- `200`: Success -- `400`: Bad request (invalid query) -- `401`: Unauthorized (invalid/missing token) -- `422`: Unprocessable entity (query execution error) -- `500`: Internal server error - -### Error Types -- `bad_data`: Invalid query syntax -- `timeout`: Query execution timeout -- `canceled`: Query was canceled -- `execution`: Query execution error - -## Best Practices - -### Query Optimization -- Use appropriate time ranges to limit data volume -- Apply label filters to reduce result sets -- Use recording rules for frequently accessed metrics - -### Rate Limiting -- Avoid high-frequency polling -- Cache results when appropriate -- Use appropriate step sizes for range queries - -### Security -- Keep API tokens secure -- Use HTTPS in production -- Rotate tokens regularly - -## Use Cases - -- **System Monitoring**: Track system health and performance -- **Capacity Planning**: Monitor resource utilization trends -- **Alerting**: Set up alerts based on metric thresholds -- **Performance Analysis**: Analyze system performance over time -- **Debugging**: Investigate issues using detailed metrics -- **Business Intelligence**: Track document processing and knowledge extraction metrics \ No newline at end of file diff --git a/docs/apis/api-prompt.md b/docs/apis/api-prompt.md deleted file mode 100644 index ff50a6e2..00000000 --- a/docs/apis/api-prompt.md +++ /dev/null @@ -1,141 +0,0 @@ - -# TrustGraph Prompt API - -This is a higher-level interface to the LLM service. The input -specifies a prompt template by ID and some variables to include in the -template. - -## Request/response - -### Request - -The request contains the following fields: -- `id`: A prompt template ID -- `variables`: A set of key/values describing the variables - -### Response - -The response contains either of these fields: -- `text`: A plain text response -- `object`: A structured object, JSON-encoded - -## REST service - -The REST service accepts `id` and `variables` fields, the variables are -encoded as a JSON object. - -e.g. - -In this example, the template takes a `text` variable and returns an -array of entity definitions in t he `object` field. The value is -JSON-encoded. - -Request: -``` -{ - "id": "extract-definitions", - "variables": { - "text": "A cat is a domesticated Felidae animal" - } -} -``` - -Response: - -``` -{ - "object": "[{\"entity\": \"cat\", \"definition\": \"a domesticated Felidae animal\"}]" -}, -``` - -## Websocket - -Requests have `id` and `variables` fields. - -e.g. - -Request: - -``` -{ - "id": "akshfkiehfkseffh-142", - "service": "prompt", - "flow": "default", - "request": { - "id": "extract-definitions", - "variables": { - "text": "A cat is a domesticated Felidae animal" - } - } -} -``` - -Responses: - -``` -{ - "id": "akshfkiehfkseffh-142", - "response": { - "object": "[{\"entity\": \"cat\", \"definition\": \"a domesticated Felidae animal\"}]" - }, - "complete": true -} -``` - -e.g. - -An example which returns plain text - -Request: - -``` -{ - "id": "akshfkiehfkseffh-141", - "service": "prompt", - "request": { - "id": "question", - "variables": { - "question": "What is 2 + 2?" - } - } -} -``` - -Response: - -``` -{ - "id": "akshfkiehfkseffh-141", - "response": { - "text": "2 + 2 = 4" - }, - "complete": true -} -``` - - -## Pulsar - -The Pulsar schema for the Prompt API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/prompt.py - -Default request queue: -`non-persistent://tg/request/prompt` - -Default response queue: -`non-persistent://tg/response/prompt` - -Request schema: -`trustgraph.schema.PromptRequest` - -Response schema: -`trustgraph.schema.PromptResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.PromptClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/prompt_client.py - diff --git a/docs/apis/api-text-completion.md b/docs/apis/api-text-completion.md deleted file mode 100644 index 1d8eb1c2..00000000 --- a/docs/apis/api-text-completion.md +++ /dev/null @@ -1,106 +0,0 @@ - -# TrustGraph Text Completion API - -This is a low-level interface to the LLM service. For a higher-level -interface with template management, consider the -[Prompt API](api-prompt.md). - -## Request/response - -### Request - -Some LLM system permit specifying a separate `system` prompt. When -the same system prompt is used repeatedly, this can result in lower -token costs for the system part or quicker LLM response. - -The request contains the following fields: -- `system`: A string, the system part -- `prompt`: A string, the user part - -### Response - -The response contains the following fields: -- `response`: LLM response - -## REST service - -The REST service accepts a request object containing the question field. -The response is a JSON object containing the `answer` field. - -e.g. - -Request: -``` -{ - "system": "You are a helpful agent", - "prompt": "What does NASA stand for?" -} -``` - -Response: - -``` -{ - "response": "National Aeronautics and Space Administration" -} -``` - -## Websocket - -Requests have a `request` object containing the `system` and -`prompt` fields. -Responses have a `response` object containing `response` field. - -e.g. - -Request: - -``` -{ - "id": "blrqotfefnmnh7de-1", - "service": "text-completion", - "flow": "default", - "request": { - "system": "You are a helpful agent", - "prompt": "What does NASA stand for?" - } -} -``` - -Response: - -``` -{ - "id": "blrqotfefnmnh7de-1", - "response": { - "response": "National Aeronautics and Space Administration" - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Text Completion API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/models.py - -Default request queue: -`non-persistent://tg/request/text-completion` - -Default response queue: -`non-persistent://tg/response/text-completion` - -Request schema: -`trustgraph.schema.TextCompletionRequest` - -Response schema: -`trustgraph.schema.TextCompletionResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.LlmClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/llm_client.py - diff --git a/docs/apis/api-text-load.md b/docs/apis/api-text-load.md deleted file mode 100644 index f61a08a3..00000000 --- a/docs/apis/api-text-load.md +++ /dev/null @@ -1,168 +0,0 @@ -# TrustGraph Text Load API - -This API loads text documents into TrustGraph processing pipelines. It's a sender API -that accepts text documents with metadata and queues them for processing through -specified flows. - -## Request Format - -The text-load API accepts a JSON request with the following fields: -- `id`: Document identifier (typically a URI) -- `metadata`: Array of RDF triples providing document metadata -- `charset`: Character encoding (defaults to "utf-8") -- `text`: Base64-encoded text content -- `user`: User identifier (defaults to "trustgraph") -- `collection`: Collection identifier (defaults to "default") - -## Request Example - -```json -{ - "id": "https://example.com/documents/research-paper-123", - "metadata": [ - { - "s": {"v": "https://example.com/documents/research-paper-123", "e": true}, - "p": {"v": "http://purl.org/dc/terms/title", "e": true}, - "o": {"v": "Machine Learning in Healthcare", "e": false} - }, - { - "s": {"v": "https://example.com/documents/research-paper-123", "e": true}, - "p": {"v": "http://purl.org/dc/terms/creator", "e": true}, - "o": {"v": "Dr. Jane Smith", "e": false} - }, - { - "s": {"v": "https://example.com/documents/research-paper-123", "e": true}, - "p": {"v": "http://purl.org/dc/terms/subject", "e": true}, - "o": {"v": "Healthcare AI", "e": false} - } - ], - "charset": "utf-8", - "text": "VGhpcyBpcyBhIHNhbXBsZSByZXNlYXJjaCBwYXBlciBhYm91dCBtYWNoaW5lIGxlYXJuaW5nIGluIGhlYWx0aGNhcmUuLi4=", - "user": "researcher", - "collection": "healthcare-research" -} -``` - -## Response - -The text-load API is a sender API with no response body. Success is indicated by HTTP status code 200. - -## REST service - -The text-load service is available at: -`POST /api/v1/flow/{flow-id}/service/text-load` - -Where `{flow-id}` is the identifier of the flow that will process the document. - -Example: -```bash -curl -X POST \ - -H "Content-Type: application/json" \ - -d @document.json \ - http://api-gateway:8080/api/v1/flow/pdf-processing/service/text-load -``` - -## Metadata Format - -Each metadata triple contains: -- `s`: Subject (object with `v` for value and `e` for is_entity boolean) -- `p`: Predicate (object with `v` for value and `e` for is_entity boolean) -- `o`: Object (object with `v` for value and `e` for is_entity boolean) - -The `e` field indicates whether the value should be treated as an entity (true) or literal (false). - -## Common Metadata Properties - -### Document Properties -- `http://purl.org/dc/terms/title`: Document title -- `http://purl.org/dc/terms/creator`: Document author -- `http://purl.org/dc/terms/subject`: Document subject/topic -- `http://purl.org/dc/terms/description`: Document description -- `http://purl.org/dc/terms/date`: Publication date -- `http://purl.org/dc/terms/language`: Document language - -### Organizational Properties -- `http://xmlns.com/foaf/0.1/name`: Organization name -- `http://www.w3.org/2006/vcard/ns#hasAddress`: Organization address -- `http://xmlns.com/foaf/0.1/homepage`: Organization website - -### Publication Properties -- `http://purl.org/ontology/bibo/doi`: DOI identifier -- `http://purl.org/ontology/bibo/isbn`: ISBN identifier -- `http://purl.org/ontology/bibo/volume`: Publication volume -- `http://purl.org/ontology/bibo/issue`: Publication issue - -## Text Encoding - -The `text` field must contain base64-encoded content. To encode text: - -```bash -# Command line encoding -echo "Your text content here" | base64 - -# Python encoding -import base64 -encoded_text = base64.b64encode("Your text content here".encode('utf-8')).decode('utf-8') -``` - -## Integration with Processing Flows - -Once loaded, text documents are processed through the specified flow, which typically includes: - -1. **Text Chunking**: Breaking documents into manageable chunks -2. **Embedding Generation**: Creating vector embeddings for semantic search -3. **Knowledge Extraction**: Extracting entities and relationships -4. **Graph Storage**: Storing extracted knowledge in the knowledge graph -5. **Indexing**: Making content searchable for RAG queries - -## Error Handling - -Common errors include: -- Invalid base64 encoding in text field -- Missing required fields (id, text) -- Invalid metadata triple format -- Flow not found or inactive - -## Python SDK - -```python -import base64 -from trustgraph.api.text_load import TextLoadClient - -client = TextLoadClient() - -# Prepare document -document = { - "id": "https://example.com/doc-123", - "metadata": [ - { - "s": {"v": "https://example.com/doc-123", "e": True}, - "p": {"v": "http://purl.org/dc/terms/title", "e": True}, - "o": {"v": "Sample Document", "e": False} - } - ], - "charset": "utf-8", - "text": base64.b64encode("Document content here".encode('utf-8')).decode('utf-8'), - "user": "alice", - "collection": "research" -} - -# Load document -await client.load_text_document("my-flow", document) -``` - -## Use Cases - -- **Research Paper Ingestion**: Load academic papers with rich metadata -- **Document Processing**: Ingest documents for knowledge extraction -- **Content Management**: Build searchable document repositories -- **RAG System Population**: Load content for question-answering systems -- **Knowledge Base Construction**: Convert documents into structured knowledge - -## Features - -- **Rich Metadata**: Full RDF metadata support for semantic annotation -- **Flow Integration**: Direct integration with TrustGraph processing flows -- **Multi-tenant**: User and collection-based document organization -- **Encoding Support**: Flexible character encoding support -- **No Response Required**: Fire-and-forget operation for high throughput \ No newline at end of file diff --git a/docs/apis/api-triples-query.md b/docs/apis/api-triples-query.md deleted file mode 100644 index 7c1a6bd9..00000000 --- a/docs/apis/api-triples-query.md +++ /dev/null @@ -1,215 +0,0 @@ - -# TrustGraph Triples Query API - -This is a service which queries the knowledge graph for triples ("facts"). - -## Request/response - -### Request - -The request contains the following fields: -- `s`: Optional, if included specifies a match for the subject part of a - triple. -- `p`: Optional, if included specifies a match for the subject part of a - triple. -- `o`: Optional, if included specifies a match for the subject part of a - triple. -- `limit`: Optional, if included specifies the maximum number of triples to - return. If not specified, an arbitrary value is used. - -Returned triples will match all of `s`, `p` and `o` where provided. - -### Response - -The response contains the following fields: -- `response`: A list of triples. - -Each triple contains `s`, `p` and `o` fields describing the -subject, predicate and object part of each triple. - -Each triple element uses the same schema: -- `value`: the entity URI or literal value depending on whether this is - graph entity or literal value. -- `is_uri`: A boolean value which is true if this is a graph entity i.e. - `value` is a URI, not a literal value. - -## Data Format Details - -### Triple Element Format - -To reduce the size of JSON messages, triple elements (subject, predicate, object) are encoded using a compact format: - -- `v`: The value as a string (maps to `value` in the full schema) -- `e`: Boolean indicating if this is an entity/URI (maps to `is_uri` in the full schema) - -Each triple element (`s`, `p`, `o`) contains: -- `v`: The actual value as a string -- `e`: Boolean indicating the value type - - `true`: The value is a URI/entity (e.g., `"http://example.com/Person1"`) - - `false`: The value is a literal (e.g., `"John Doe"`, `"42"`, `"2023-01-01"`) - -### Examples - -**URI/Entity Element:** -```json -{ - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true -} -``` - -**Literal Element:** -```json -{ - "v": "space station modules", - "e": false -} -``` - -**Numeric Literal:** -```json -{ - "v": "42", - "e": false -} -``` - -## REST service - -The REST service accepts a request object containing the `s`, `p`, `o` -and `limit` fields. -The response is a JSON object containing the `response` field. - -e.g. - -This example query matches triples with a subject of -`http://trustgraph.ai/e/space-station-modules` and a predicate of -`http://www.w3.org/2000/01/rdf-schema#label`. This predicate -represents the RDF schema 'label' relationship. - -The response is a single triple - the `o` element contains the -literal "space station modules" which is the label for -`http://trustgraph.ai/e/space-station-modules`. - -Request: -``` -{ - "id": "qgzw1287vfjc8wsk-4", - "service": "triples-query", - "flow": "default", - "request": { - "s": { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - "p": { - "v": "http://www.w3.org/2000/01/rdf-schema#label", - "e": true - }, - "limit": 5 - } -} -``` - -Response: - -``` -{ - "response": [ - { - "s": { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - "p": { - "v": "http://www.w3.org/2000/01/rdf-schema#label", - "e": true - }, - "o": { - "v": "space station modules", - "e": false - } - } - ] -} -``` - -## Websocket - -Requests have a `request` object containing the query fields (`s`, `p`, `o`, `limit`). -Responses have a `response` object containing `response` field. - -e.g. - -Request: - -``` -{ - "id": "qgzw1287vfjc8wsk-4", - "service": "triples-query", - "request": { - "s": { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - "p": { - "v": "http://www.w3.org/2000/01/rdf-schema#label", - "e": true - }, - "limit": 5 - } -} -``` - -Responses: - -``` -{ - "id": "qgzw1287vfjc8wsk-4", - "response": { - "response": [ - { - "s": { - "v": "http://trustgraph.ai/e/space-station-modules", - "e": true - }, - "p": { - "v": "http://www.w3.org/2000/01/rdf-schema#label", - "e": true - }, - "o": { - "v": "space station modules", - "e": false - } - } - ] - }, - "complete": true -} -``` - -## Pulsar - -The Pulsar schema for the Triples Query API is defined in Python code here: - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/schema/graph.py - -Default request queue: -`non-persistent://tg/request/triples-query` - -Default response queue: -`non-persistent://tg/response/triples-query` - -Request schema: -`trustgraph.schema.TriplesQueryRequest` - -Response schema: -`trustgraph.schema.TriplesQueryResponse` - -## Pulsar Python client - -The client class is -`trustgraph.clients.TriplesQueryClient` - -https://github.com/trustgraph-ai/trustgraph/blob/master/trustgraph-base/trustgraph/clients/triples_query_client.py - diff --git a/docs/apis/pulsar.md b/docs/apis/pulsar.md deleted file mode 100644 index ece6e75b..00000000 --- a/docs/apis/pulsar.md +++ /dev/null @@ -1,230 +0,0 @@ -# TrustGraph Pulsar API - -Apache Pulsar is the underlying message queue system used by TrustGraph for inter-component communication. Understanding Pulsar queue names is essential for direct integration with TrustGraph services. - -## Overview - -TrustGraph uses two types of APIs with different queue naming patterns: - -1. **Global Services**: Fixed queue names, not dependent on flows -2. **Flow-Hosted Services**: Dynamic queue names that depend on the specific flow configuration - -## Global Services (Fixed Queue Names) - -These services run independently and have fixed Pulsar queue names: - -### Config API -- **Request Queue**: `non-persistent://tg/request/config` -- **Response Queue**: `non-persistent://tg/response/config` -- **Push Queue**: `persistent://tg/config/config` - -### Flow API -- **Request Queue**: `non-persistent://tg/request/flow` -- **Response Queue**: `non-persistent://tg/response/flow` - -### Knowledge API -- **Request Queue**: `non-persistent://tg/request/knowledge` -- **Response Queue**: `non-persistent://tg/response/knowledge` - -### Librarian API -- **Request Queue**: `non-persistent://tg/request/librarian` -- **Response Queue**: `non-persistent://tg/response/librarian` - -## Flow-Hosted Services (Dynamic Queue Names) - -These services are hosted within specific flows and have queue names that depend on the flow configuration: - -- Agent API -- Document RAG API -- Graph RAG API -- Text Completion API -- Prompt API -- Embeddings API -- Graph Embeddings API -- Triples Query API -- Text Load API -- Document Load API - -## Discovering Flow-Hosted Queue Names - -To find the queue names for flow-hosted services, you need to query the flow configuration using the Config API. - -### Method 1: Using the Config API - -Query for the flow configuration: - -**Request:** -```json -{ - "operation": "get", - "keys": [ - { - "type": "flows", - "key": "your-flow-name" - } - ] -} -``` - -**Response:** -The response will contain a flow definition with an "interfaces" object that lists all queue names. - -### Method 2: Using the CLI - -Use the TrustGraph CLI to dump the configuration: - -```bash -tg-show-config -``` - -## Flow Interface Types - -Flow configurations define two types of service interfaces: - -### 1. Request/Response Interfaces - -Services that accept a request and return a response: - -```json -{ - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:document-rag+graph-rag", - "response": "non-persistent://tg/response/graph-rag:document-rag+graph-rag" - } -} -``` - -**Examples**: agent, document-rag, graph-rag, text-completion, prompt, embeddings, graph-embeddings, triples - -### 2. Fire-and-Forget Interfaces - -Services that accept data but don't return a response: - -```json -{ - "text-load": "persistent://tg/flow/text-document-load:default" -} -``` - -**Examples**: text-load, document-load, triples-store, graph-embeddings-store, document-embeddings-store, entity-contexts-load - -## Example Flow Configuration - -Here's an example of a complete flow configuration showing queue names: - -```json -{ - "class-name": "document-rag+graph-rag", - "description": "Default processing flow", - "interfaces": { - "agent": { - "request": "non-persistent://tg/request/agent:default", - "response": "non-persistent://tg/response/agent:default" - }, - "document-rag": { - "request": "non-persistent://tg/request/document-rag:document-rag+graph-rag", - "response": "non-persistent://tg/response/document-rag:document-rag+graph-rag" - }, - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:document-rag+graph-rag", - "response": "non-persistent://tg/response/graph-rag:document-rag+graph-rag" - }, - "text-completion": { - "request": "non-persistent://tg/request/text-completion:document-rag+graph-rag", - "response": "non-persistent://tg/response/text-completion:document-rag+graph-rag" - }, - "embeddings": { - "request": "non-persistent://tg/request/embeddings:document-rag+graph-rag", - "response": "non-persistent://tg/response/embeddings:document-rag+graph-rag" - }, - "triples": { - "request": "non-persistent://tg/request/triples:document-rag+graph-rag", - "response": "non-persistent://tg/response/triples:document-rag+graph-rag" - }, - "text-load": "persistent://tg/flow/text-document-load:default", - "document-load": "persistent://tg/flow/document-load:default", - "triples-store": "persistent://tg/flow/triples-store:default", - "graph-embeddings-store": "persistent://tg/flow/graph-embeddings-store:default" - } -} -``` - -## Queue Naming Patterns - -### Global Services -- **Pattern**: `{persistence}://tg/{namespace}/{service-name}` -- **Example**: `non-persistent://tg/request/config` - -### Flow-Hosted Request/Response -- **Pattern**: `{persistence}://tg/{namespace}/{service-name}:{flow-identifier}` -- **Example**: `non-persistent://tg/request/graph-rag:document-rag+graph-rag` - -### Flow-Hosted Fire-and-Forget -- **Pattern**: `{persistence}://tg/flow/{service-name}:{flow-identifier}` -- **Example**: `persistent://tg/flow/text-document-load:default` - -## Persistence Types - -- **non-persistent**: Messages are not persisted to disk, faster but less reliable -- **persistent**: Messages are persisted to disk, slower but more reliable - -## Practical Usage - -### Python Example - -```python -import pulsar -from trustgraph.schema import ConfigRequest, ConfigResponse - -# Connect to Pulsar -client = pulsar.Client('pulsar://localhost:6650') - -# Create producer for config requests -producer = client.create_producer( - 'non-persistent://tg/request/config', - schema=pulsar.schema.AvroSchema(ConfigRequest) -) - -# Create consumer for config responses -consumer = client.subscribe( - 'non-persistent://tg/response/config', - subscription_name='my-subscription', - schema=pulsar.schema.AvroSchema(ConfigResponse) -) - -# Send request -request = ConfigRequest(operation='list-classes') -producer.send(request) - -# Receive response -response = consumer.receive() -print(response.value()) -``` - -### Flow Service Example - -```python -# First, get the flow configuration to find queue names -config_request = ConfigRequest( - operation='get', - keys=[ConfigKey(type='flows', key='my-flow')] -) - -# Use the returned interface information to determine queue names -# Then connect to the appropriate queues for the service you need -``` - -## Best Practices - -1. **Query Flow Configuration**: Always query the current flow configuration to get accurate queue names -2. **Handle Dynamic Names**: Flow-hosted service queue names can change when flows are reconfigured -3. **Choose Appropriate Persistence**: Use persistent queues for critical data, non-persistent for performance -4. **Schema Validation**: Use the appropriate Pulsar schema for each service -5. **Error Handling**: Implement proper error handling for queue connection and message failures - -## Security Considerations - -- Pulsar access should be restricted in production environments -- Use appropriate authentication and authorization mechanisms -- Monitor queue access and message patterns for security anomalies -- Consider encryption for sensitive data in messages \ No newline at end of file diff --git a/docs/apis/websocket.md b/docs/apis/websocket.md deleted file mode 100644 index 07307cf4..00000000 --- a/docs/apis/websocket.md +++ /dev/null @@ -1,141 +0,0 @@ - -# TrustGraph websocket overview - -The websocket service is provided by the `api-gateway` service on port -8088. - -## URL - -Depending on how the service is hosted, the websocket is invoked on this -URL on `api-gateway`: - -``` -/api/v1/socket -``` - -When hosted using docker compose, you can access the service at -`ws://localhost:8088/api/v1/socket` - -## Request - -A request message is a JSON message containing 3/4 fields: - -- `id`: A unique ID which is used to correlate requests and responses. - You should make sure it is unique. -- `service`: The name of the service to invoke. -- `request`: The request body which is passed to the service - this is - defined in the API documentation for that service. -- `flow`: Some APIs are supported by processors launched within a flow, - are are dependent on a flow running. For such APIs, the flow identifier - needs to be provided. - -e.g. - -``` -{ - "id": "qgzw1287vfjc8wsk-1", - "service": "graph-rag", - "flow": "default", - "request": { - "query": "What does NASA stand for?" - } -} -``` - -## Response - -A response message is JSON encoded, and may contain the following fields: - -- `id`: This is the same value provided on the request and shows which - request this response is returned for. -- `error`: If an error occured, this field is provided, and provides an - error message. -- `response`: For a non-error case, this provides a response from the - service - the response structure depends on the service invoked. It is - not provided if the `error` field is provided. -- `complete`: A boolean value indicating whether this response is the - final response from the service. If set to false, the response values - are intermediate values. It is not provided if the `error` field is - provided. - -An error response completes a request - no further responses -will be provided. - -e.g. - -``` -{ - "id": "qgzw1287vfjc8wsk-1", - "response": { - "response": "National Aeronautics and Space Administration." - }, - "complete": true -} -``` - -## Multi-part response - -For a multi-part response, a number of responses are provided with the -same ID until the final message which has the `complete` field set to -true. - -Note that multi-part responses are a feature of the websocket API which -the request/response nature of the REST API is not able to provide. - -e.g. - -Request: - -``` -{ - "id": "blrqotfefnmnh7de-20", - "service": "agent", - "flow": "default", - "request": { - "question": "What does NASA stand for?" - } -} -``` - -Responses: - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "thought": "I need to query a knowledge base" - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "observation": "National Aeronautics and Space Administration." - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "thought": "I now know the final answer" - }, - "complete": false -} -``` - -``` -{ - "id": "blrqotfefnmnh7de-20", - "response": { - "answer": "National Aeronautics and Space Administration" - }, - "complete": true -} -``` - diff --git a/docs/cli/README.md b/docs/cli/README.md deleted file mode 100644 index 10f74c09..00000000 --- a/docs/cli/README.md +++ /dev/null @@ -1,173 +0,0 @@ -# TrustGraph CLI Documentation - -The TrustGraph Command Line Interface (CLI) provides comprehensive command-line access to all TrustGraph services. These tools wrap the REST and WebSocket APIs to provide convenient, scriptable access to TrustGraph functionality. - -## Installation - -The CLI tools are installed as part of the `trustgraph-cli` package: - -```bash -pip install trustgraph-cli -``` - -> [!NOTE] -> The CLI version should match the version of TrustGraph being deployed. - -## Global Options - -Most CLI commands support these common options: - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection identifier (default: `default`) -- `-f, --flow-id FLOW`: Flow identifier (default: `default`) - -## Command Categories - -### System Administration & Configuration - -**System Setup:** -- [`tg-init-trustgraph`](tg-init-trustgraph.md) - Initialize Pulsar with TrustGraph configuration -- [`tg-init-pulsar-manager`](tg-init-pulsar-manager.md) - Initialize Pulsar manager setup -- [`tg-show-config`](tg-show-config.md) - Display current system configuration - -**Token Management:** -- [`tg-set-token-costs`](tg-set-token-costs.md) - Configure model token costs -- [`tg-show-token-costs`](tg-show-token-costs.md) - Display token cost configuration -- [`tg-show-token-rate`](tg-show-token-rate.md) - Show token usage rates - -**Prompt Management:** -- [`tg-set-prompt`](tg-set-prompt.md) - Configure prompt templates and system prompts -- [`tg-show-prompts`](tg-show-prompts.md) - Display configured prompt templates - -### Flow Management - -**Flow Operations:** -- [`tg-start-flow`](tg-start-flow.md) - Start a processing flow -- [`tg-stop-flow`](tg-stop-flow.md) - Stop a running flow -- [`tg-show-flows`](tg-show-flows.md) - List all configured flows -- [`tg-show-flow-state`](tg-show-flow-state.md) - Show current flow states - -**Flow Blueprint Management:** -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definition -- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definition -- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definition -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes - -### Knowledge Graph Management - -**Knowledge Core Operations:** -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge core into processing -- [`tg-put-kg-core`](tg-put-kg-core.md) - Store knowledge core in system -- [`tg-get-kg-core`](tg-get-kg-core.md) - Retrieve knowledge core -- [`tg-delete-kg-core`](tg-delete-kg-core.md) - Remove knowledge core -- [`tg-unload-kg-core`](tg-unload-kg-core.md) - Unload knowledge core from processing -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores - -**Graph Data Operations:** -- [`tg-show-graph`](tg-show-graph.md) - Display graph triples/edges -- [`tg-graph-to-turtle`](tg-graph-to-turtle.md) - Export graph to Turtle format -- [`tg-load-turtle`](tg-load-turtle.md) - Import RDF triples from Turtle files - -### Document Processing & Library Management - -**Document Loading:** -- [`tg-load-pdf`](tg-load-pdf.md) - Load PDF documents into processing -- [`tg-load-text`](tg-load-text.md) - Load text documents into processing -- [`tg-load-sample-documents`](tg-load-sample-documents.md) - Load sample documents for testing - -**Library Management:** -- [`tg-add-library-document`](tg-add-library-document.md) - Add documents to library -- [`tg-show-library-documents`](tg-show-library-documents.md) - List documents in library -- [`tg-remove-library-document`](tg-remove-library-document.md) - Remove documents from library -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start processing library documents -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop library document processing -- [`tg-show-library-processing`](tg-show-library-processing.md) - Show library processing status - -**Document Embeddings:** -- [`tg-load-doc-embeds`](tg-load-doc-embeds.md) - Load document embeddings -- [`tg-save-doc-embeds`](tg-save-doc-embeds.md) - Save document embeddings - -### AI Services & Agent Interaction - -**Query & Interaction:** -- [`tg-invoke-agent`](tg-invoke-agent.md) - Interactive agent Q&A via WebSocket -- [`tg-invoke-llm`](tg-invoke-llm.md) - Direct LLM text completion -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Use configured prompt templates -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based RAG queries -- [`tg-invoke-graph-rag`](tg-invoke-graph-rag.md) - Graph-based RAG queries - -**Tool & Prompt Management:** -- [`tg-show-tools`](tg-show-tools.md) - List available agent tools -- [`tg-set-prompt`](tg-set-prompt.md) - Configure prompt templates -- [`tg-show-prompts`](tg-show-prompts.md) - List configured prompts - -### System Monitoring & Debugging - -**System Status:** -- [`tg-show-processor-state`](tg-show-processor-state.md) - Show processing component states - -**Debugging:** -- [`tg-dump-msgpack`](tg-dump-msgpack.md) - Dump MessagePack data for debugging - -## Quick Start Examples - -### Basic Document Processing -```bash -# Start a flow -tg-start-flow --flow-id my-flow --blueprint-name document-processing - -# Load a document -tg-load-text --flow-id my-flow --text "Your document content" --title "Test Document" - -# Query the knowledge -tg-invoke-graph-rag --flow-id my-flow --query "What is the document about?" -``` - -### Knowledge Management -```bash -# List available knowledge cores -tg-show-kg-cores - -# Load a knowledge core into a flow -tg-load-kg-core --flow-id my-flow --kg-core-id my-knowledge - -# Query the knowledge graph -tg-show-graph --limit 100 -``` - -### Flow Management -```bash -# Show available flow blueprintes -tg-show-flow-blueprints - -# Show running flows -tg-show-flows - -# Stop a flow -tg-stop-flow --flow-id my-flow -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL for all commands -- `TRUSTGRAPH_USER`: Default user identifier -- `TRUSTGRAPH_COLLECTION`: Default collection identifier - -## Authentication - -CLI commands inherit authentication from the environment or API configuration. See the main TrustGraph documentation for authentication setup. - -## Error Handling - -All CLI commands provide: -- Consistent error reporting -- Exit codes (0 for success, non-zero for errors) -- Detailed error messages for troubleshooting -- Retry logic for network operations where appropriate - -## Related Documentation - -- [TrustGraph API Documentation](../apis/README.md) -- [TrustGraph WebSocket Guide](../apis/websocket.md) -- [TrustGraph Pulsar Guide](../apis/pulsar.md) diff --git a/docs/cli/tg-add-library-document.md b/docs/cli/tg-add-library-document.md deleted file mode 100644 index a3cc2572..00000000 --- a/docs/cli/tg-add-library-document.md +++ /dev/null @@ -1,285 +0,0 @@ -# tg-add-library-document - -Adds documents to the TrustGraph library with comprehensive metadata support. - -## Synopsis - -```bash -tg-add-library-document [options] file1 [file2 ...] -``` - -## Description - -The `tg-add-library-document` command adds documents to the TrustGraph library system, which provides persistent document storage with rich metadata management. Unlike direct document loading, the library approach offers better document lifecycle management, metadata preservation, and processing control. - -Documents added to the library can later be processed using `tg-start-library-processing` for controlled batch processing operations. - -## Options - -### Connection & User -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) - -### Document Information -- `--name NAME`: Document name/title -- `--description DESCRIPTION`: Document description -- `--id ID`: Custom document identifier (if not specified, uses content hash) -- `--kind MIMETYPE`: Document MIME type (auto-detected if not specified) -- `--tags TAGS`: Comma-separated list of tags - -### Copyright Information -- `--copyright-notice NOTICE`: Copyright notice text -- `--copyright-holder HOLDER`: Copyright holder name -- `--copyright-year YEAR`: Copyright year -- `--license LICENSE`: Copyright license - -### Publication Information -- `--publication-organization ORG`: Publishing organization name -- `--publication-description DESC`: Publication description -- `--publication-date DATE`: Publication date -- `--publication-url URL`: Publication URL - -### Document Source -- `--document-url URL`: Original document source URL -- `--keyword KEYWORDS`: Document keywords (space-separated) - -## Arguments - -- `file1 [file2 ...]`: One or more files to add to the library - -## Examples - -### Basic Document Addition -```bash -tg-add-library-document report.pdf -``` - -### With Complete Metadata -```bash -tg-add-library-document \ - --name "Annual Research Report 2024" \ - --description "Comprehensive analysis of research outcomes" \ - --copyright-holder "Research Institute" \ - --copyright-year "2024" \ - --license "CC BY 4.0" \ - --tags "research,annual,analysis" \ - --keyword "research" "analysis" "2024" \ - annual-report.pdf -``` - -### Academic Paper -```bash -tg-add-library-document \ - --name "Machine Learning in Healthcare" \ - --description "Study on ML applications in medical diagnosis" \ - --publication-organization "University Medical School" \ - --publication-date "2024-03-15" \ - --copyright-holder "Dr. Jane Smith" \ - --tags "machine-learning,healthcare,medical" \ - --keyword "ML" "healthcare" "diagnosis" \ - ml-healthcare-paper.pdf -``` - -### Multiple Documents with Shared Metadata -```bash -tg-add-library-document \ - --publication-organization "Tech Company" \ - --copyright-holder "Tech Company Inc." \ - --copyright-year "2024" \ - --license "Proprietary" \ - --tags "documentation,technical" \ - manual-v1.pdf manual-v2.pdf manual-v3.pdf -``` - -### Custom Document ID -```bash -tg-add-library-document \ - --id "PROJ-2024-001" \ - --name "Project Specification" \ - --description "Technical requirements document" \ - project-spec.docx -``` - -## Document Processing - -1. **File Reading**: Reads document content as binary data -2. **ID Generation**: Creates SHA256 hash-based ID (unless custom ID provided) -3. **Metadata Assembly**: Combines all metadata into structured format -4. **Library Storage**: Stores document and metadata in library system -5. **URI Creation**: Generates TrustGraph document URI - -## Document ID Generation - -- **Automatic**: SHA256 hash of file content converted to TrustGraph URI -- **Custom**: Use `--id` parameter for specific identifiers -- **Format**: `http://trustgraph.ai/d/[hash-or-custom-id]` - -## MIME Type Detection - -The system automatically detects document types: -- **PDF**: `application/pdf` -- **Word**: `application/vnd.openxmlformats-officedocument.wordprocessingml.document` -- **Text**: `text/plain` -- **HTML**: `text/html` - -Override with `--kind` parameter if needed. - -## Metadata Format - -Metadata is stored as RDF triples including: - -### Dublin Core Properties -- `dc:title`: Document name -- `dc:description`: Document description -- `dc:creator`: Copyright holder -- `dc:date`: Publication date -- `dc:rights`: Copyright notice -- `dc:license`: License information -- `dc:subject`: Keywords and tags - -### Organization Information -- `foaf:Organization`: Publisher details -- `foaf:name`: Organization name -- `vcard:hasURL`: Organization website - -### Document Properties -- `bibo:doi`: DOI if applicable -- `bibo:url`: Document source URL - -## Output - -For each successfully added document: -```bash -report.pdf: Loaded successfully. -``` - -For failures: -```bash -invalid.pdf: Failed: File not found -``` - -## Error Handling - -### File Errors -```bash -document.pdf: Failed: No such file or directory -``` -**Solution**: Verify file path exists and is readable. - -### Permission Errors -```bash -document.pdf: Failed: Permission denied -``` -**Solution**: Check file permissions and user access rights. - -### Connection Errors -```bash -document.pdf: Failed: Connection refused -``` -**Solution**: Verify API URL and ensure TrustGraph is running. - -### Library Errors -```bash -document.pdf: Failed: Document already exists -``` -**Solution**: Use different ID or update existing document. - -## Library Management Workflow - -### 1. Add Documents -```bash -tg-add-library-document research-paper.pdf -``` - -### 2. Verify Addition -```bash -tg-show-library-documents -``` - -### 3. Start Processing -```bash -tg-start-library-processing --flow-id research-flow -``` - -### 4. Monitor Processing -```bash -tg-show-library-processing -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-library-documents`](tg-show-library-documents.md) - List library documents -- [`tg-remove-library-document`](tg-remove-library-document.md) - Remove documents from library -- [`tg-start-library-processing`](tg-start-library-processing.md) - Process library documents -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop library processing -- [`tg-show-library-processing`](tg-show-library-processing.md) - Show processing status - -## API Integration - -This command uses the [Librarian API](../apis/api-librarian.md) with the `add-document` operation to store documents with metadata. - -## Use Cases - -### Research Document Management -```bash -tg-add-library-document \ - --name "Climate Change Analysis" \ - --publication-organization "Climate Research Institute" \ - --tags "climate,research,environment" \ - climate-study.pdf -``` - -### Corporate Documentation -```bash -tg-add-library-document \ - --name "Product Manual v2.1" \ - --copyright-holder "Acme Corporation" \ - --license "Proprietary" \ - --tags "manual,product,v2.1" \ - product-manual.pdf -``` - -### Legal Document Archive -```bash -tg-add-library-document \ - --name "Contract Template" \ - --description "Standard service agreement template" \ - --copyright-holder "Legal Department" \ - --tags "legal,contract,template" \ - contract-template.docx -``` - -### Academic Paper Collection -```bash -tg-add-library-document \ - --publication-organization "IEEE" \ - --copyright-year "2024" \ - --tags "academic,ieee,conference" \ - paper1.pdf paper2.pdf paper3.pdf -``` - -## Best Practices - -1. **Consistent Metadata**: Use standardized metadata fields for better organization -2. **Meaningful Tags**: Add relevant tags for document discovery -3. **Copyright Information**: Include complete copyright details for legal compliance -4. **Batch Operations**: Process related documents together with shared metadata -5. **Version Control**: Use clear naming and tagging for document versions -6. **Library Organization**: Use collections and user assignments for multi-tenant systems - -## Advantages over Direct Loading - -### Library Benefits -- **Persistent Storage**: Documents preserved in library system -- **Metadata Management**: Rich metadata storage and querying -- **Processing Control**: Controlled batch processing with start/stop -- **Document Lifecycle**: Full document management capabilities -- **Search and Discovery**: Better document organization and retrieval - -### When to Use Library vs Direct Loading -- **Use Library**: For document management, metadata preservation, controlled processing -- **Use Direct Loading**: For immediate processing, simple workflows, temporary documents \ No newline at end of file diff --git a/docs/cli/tg-delete-flow-blueprint.md b/docs/cli/tg-delete-flow-blueprint.md deleted file mode 100644 index c99fd6da..00000000 --- a/docs/cli/tg-delete-flow-blueprint.md +++ /dev/null @@ -1,330 +0,0 @@ -# tg-delete-flow-blueprint - -Permanently deletes a flow blueprint definition from TrustGraph. - -## Synopsis - -```bash -tg-delete-flow-blueprint -n CLASS_NAME [options] -``` - -## Description - -The `tg-delete-flow-blueprint` command permanently removes a flow blueprint definition from TrustGraph. This operation cannot be undone, so use with caution. - -**⚠️ Warning**: Deleting a flow blueprint that has active flow instances may cause those instances to become unusable. Always check for active flows before deletion. - -## Options - -### Required Arguments - -- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to delete - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Delete a Flow Blueprint -```bash -tg-delete-flow-blueprint -n "old-test-flow" -``` - -### Delete with Custom API URL -```bash -tg-delete-flow-blueprint -n "deprecated-flow" -u http://staging:8088/ -``` - -### Safe Deletion Workflow -```bash -# 1. Check if flow blueprint exists -tg-show-flow-blueprints | grep "target-flow" - -# 2. Backup the flow blueprint first -tg-get-flow-blueprint -n "target-flow" > backup-target-flow.json - -# 3. Check for active flow instances -tg-show-flows | grep "target-flow" - -# 4. Delete the flow blueprint -tg-delete-flow-blueprint -n "target-flow" - -# 5. Verify deletion -tg-show-flow-blueprints | grep "target-flow" || echo "Flow blueprint deleted successfully" -``` - -## Prerequisites - -### Flow Blueprint Must Exist -Verify the flow blueprint exists before attempting deletion: - -```bash -# List all flow blueprintes -tg-show-flow-blueprints - -# Check specific flow blueprint -tg-show-flow-blueprints | grep "target-class" -``` - -### Check for Active Flow Instances -Before deleting a flow blueprint, check if any flow instances are using it: - -```bash -# List all active flows -tg-show-flows - -# Look for instances using the flow blueprint -tg-show-flows | grep "target-class" -``` - -## Error Handling - -### Flow Blueprint Not Found -```bash -Exception: Flow blueprint 'nonexistent-class' not found -``` -**Solution**: Verify the flow blueprint exists with `tg-show-flow-blueprints`. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied to delete flow blueprint -``` -**Solution**: Verify user permissions for flow blueprint management. - -### Active Flow Instances -```bash -Exception: Cannot delete flow blueprint with active instances -``` -**Solution**: Stop all flow instances using this class before deletion. - -## Use Cases - -### Cleanup Development Classes -```bash -# Delete test and development flow blueprintes -test_classes=("test-flow-v1" "dev-experiment" "prototype-flow") -for class in "${test_classes[@]}"; do - echo "Deleting $class..." - tg-delete-flow-blueprint -n "$class" -done -``` - -### Migration Cleanup -```bash -# After migrating to new flow blueprintes, remove old ones -old_classes=("legacy-flow" "deprecated-processor" "old-pipeline") -for class in "${old_classes[@]}"; do - # Backup first - tg-get-flow-blueprint -n "$class" > "backup-$class.json" 2>/dev/null - - # Delete - tg-delete-flow-blueprint -n "$class" - echo "Deleted $class" -done -``` - -### Conditional Deletion -```bash -# Delete flow blueprint only if no active instances exist -flow_class="target-flow" -active_instances=$(tg-show-flows | grep "$flow_class" | wc -l) - -if [ $active_instances -eq 0 ]; then - echo "No active instances found, deleting flow blueprint..." - tg-delete-flow-blueprint -n "$flow_class" -else - echo "Warning: $active_instances active instances found. Cannot delete." - tg-show-flows | grep "$flow_class" -fi -``` - -## Safety Considerations - -### Always Backup First -```bash -# Create backup before deletion -flow_class="important-flow" -backup_dir="flow-class-backups/$(date +%Y%m%d-%H%M%S)" -mkdir -p "$backup_dir" - -echo "Backing up flow blueprint: $flow_class" -tg-get-flow-blueprint -n "$flow_class" > "$backup_dir/$flow_class.json" - -if [ $? -eq 0 ]; then - echo "Backup created: $backup_dir/$flow_class.json" - echo "Proceeding with deletion..." - tg-delete-flow-blueprint -n "$flow_class" -else - echo "Backup failed. Aborting deletion." - exit 1 -fi -``` - -### Verification Script -```bash -#!/bin/bash -# safe-delete-flow-class.sh -flow_class="$1" - -if [ -z "$flow_class" ]; then - echo "Usage: $0 " - exit 1 -fi - -echo "Safety checks for deleting flow blueprint: $flow_class" - -# Check if flow blueprint exists -if ! tg-show-flow-blueprints | grep -q "$flow_class"; then - echo "ERROR: Flow blueprint '$flow_class' not found" - exit 1 -fi - -# Check for active instances -active_count=$(tg-show-flows | grep "$flow_class" | wc -l) -if [ $active_count -gt 0 ]; then - echo "ERROR: Found $active_count active instances using this flow blueprint" - echo "Active instances:" - tg-show-flows | grep "$flow_class" - exit 1 -fi - -# Create backup -backup_file="backup-$flow_class-$(date +%Y%m%d-%H%M%S).json" -echo "Creating backup: $backup_file" -tg-get-flow-blueprint -n "$flow_class" > "$backup_file" - -if [ $? -ne 0 ]; then - echo "ERROR: Failed to create backup" - exit 1 -fi - -# Confirm deletion -echo "Ready to delete flow blueprint: $flow_class" -echo "Backup saved as: $backup_file" -read -p "Are you sure you want to delete this flow blueprint? (y/N): " confirm - -if [ "$confirm" = "y" ] || [ "$confirm" = "Y" ]; then - echo "Deleting flow blueprint..." - tg-delete-flow-blueprint -n "$flow_class" - - # Verify deletion - if ! tg-show-flow-blueprints | grep -q "$flow_class"; then - echo "Flow blueprint deleted successfully" - else - echo "ERROR: Flow blueprint still exists after deletion" - exit 1 - fi -else - echo "Deletion cancelled" - rm "$backup_file" -fi -``` - -## Integration with Other Commands - -### Complete Flow Blueprint Lifecycle -```bash -# 1. List existing flow blueprintes -tg-show-flow-blueprints - -# 2. Get flow blueprint details -tg-get-flow-blueprint -n "target-flow" - -# 3. Check for active instances -tg-show-flows | grep "target-flow" - -# 4. Stop active instances if needed -tg-stop-flow -i "instance-id" - -# 5. Create backup -tg-get-flow-blueprint -n "target-flow" > backup.json - -# 6. Delete flow blueprint -tg-delete-flow-blueprint -n "target-flow" - -# 7. Verify deletion -tg-show-flow-blueprints | grep "target-flow" -``` - -### Bulk Deletion with Validation -```bash -# Delete multiple flow blueprintes safely -classes_to_delete=("old-flow1" "old-flow2" "test-flow") - -for class in "${classes_to_delete[@]}"; do - echo "Processing $class..." - - # Check if exists - if ! tg-show-flow-blueprints | grep -q "$class"; then - echo " $class not found, skipping" - continue - fi - - # Check for active instances - if tg-show-flows | grep -q "$class"; then - echo " $class has active instances, skipping" - continue - fi - - # Backup and delete - tg-get-flow-blueprint -n "$class" > "backup-$class.json" - tg-delete-flow-blueprint -n "$class" - echo " $class deleted" -done -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes -- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definitions -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Create/update flow blueprint definitions -- [`tg-show-flows`](tg-show-flows.md) - List active flow instances -- [`tg-stop-flow`](tg-stop-flow.md) - Stop flow instances - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `delete-class` operation to remove flow blueprint definitions. - -## Best Practices - -1. **Always Backup**: Create backups before deletion -2. **Check Dependencies**: Verify no active flow instances exist -3. **Confirmation**: Use interactive confirmation for important deletions -4. **Logging**: Log deletion operations for audit trails -5. **Permissions**: Ensure appropriate access controls for deletion operations -6. **Testing**: Test deletion procedures in non-production environments first - -## Troubleshooting - -### Command Succeeds but Class Still Exists -```bash -# Check if deletion actually occurred -tg-show-flow-blueprints | grep "deleted-class" - -# Verify API connectivity -tg-show-flow-blueprints > /dev/null && echo "API accessible" -``` - -### Permissions Issues -```bash -# Verify user has deletion permissions -# Contact system administrator if access denied -``` - -### Network Connectivity -```bash -# Test API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/flow/classes" > /dev/null -echo "API response: $?" -``` \ No newline at end of file diff --git a/docs/cli/tg-delete-kg-core.md b/docs/cli/tg-delete-kg-core.md deleted file mode 100644 index 14a7da1e..00000000 --- a/docs/cli/tg-delete-kg-core.md +++ /dev/null @@ -1,312 +0,0 @@ -# tg-delete-kg-core - -Permanently removes a knowledge core from the TrustGraph system. - -## Synopsis - -```bash -tg-delete-kg-core --id CORE_ID [options] -``` - -## Description - -The `tg-delete-kg-core` command permanently removes a stored knowledge core from the TrustGraph system. This operation is irreversible and will delete all RDF triples, graph embeddings, and metadata associated with the specified knowledge core. - -**Warning**: This operation permanently deletes data. Ensure you have backups if the knowledge core might be needed in the future. - -## Options - -### Required Arguments - -- `--id, --identifier CORE_ID`: Identifier of the knowledge core to delete - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) - -## Examples - -### Delete Specific Knowledge Core -```bash -tg-delete-kg-core --id "old-research-data" -``` - -### Delete with Specific User -```bash -tg-delete-kg-core --id "test-knowledge" -U developer -``` - -### Using Custom API URL -```bash -tg-delete-kg-core --id "obsolete-core" -u http://production:8088/ -``` - -## Prerequisites - -### Knowledge Core Must Exist -Verify the knowledge core exists before deletion: - -```bash -# Check available knowledge cores -tg-show-kg-cores - -# Ensure the core exists -tg-show-kg-cores | grep "target-core-id" -``` - -### Backup Important Data -Create backups before deletion: - -```bash -# Export knowledge core before deletion -tg-get-kg-core --id "important-core" -o backup.msgpack - -# Then proceed with deletion -tg-delete-kg-core --id "important-core" -``` - -## Safety Considerations - -### Unload from Flows First -Unload the knowledge core from any active flows: - -```bash -# Check which flows might be using the core -tg-show-flows - -# Unload from active flows -tg-unload-kg-core --id "target-core" --flow-id "active-flow" - -# Then delete the core -tg-delete-kg-core --id "target-core" -``` - -### Verify Dependencies -Check if other systems depend on the knowledge core: - -```bash -# Search for references in flow configurations -tg-show-config | grep "target-core" - -# Check processing history -tg-show-library-processing | grep "target-core" -``` - -## Deletion Process - -1. **Validation**: Verifies knowledge core exists and user has permission -2. **Dependency Check**: Ensures core is not actively loaded in flows -3. **Data Removal**: Permanently deletes RDF triples and graph embeddings -4. **Metadata Cleanup**: Removes all associated metadata and references -5. **Index Updates**: Updates system indexes to reflect deletion - -## Output - -Successful deletion typically produces no output: - -```bash -# Delete core (no output expected on success) -tg-delete-kg-core --id "test-core" - -# Verify deletion -tg-show-kg-cores | grep "test-core" -# Should return no results -``` - -## Error Handling - -### Knowledge Core Not Found -```bash -Exception: Knowledge core 'invalid-core' not found -``` -**Solution**: Check available cores with `tg-show-kg-cores` and verify the core ID. - -### Permission Denied -```bash -Exception: Access denied to knowledge core -``` -**Solution**: Verify user permissions and ownership of the knowledge core. - -### Core In Use -```bash -Exception: Knowledge core is currently loaded in active flows -``` -**Solution**: Unload the core from all flows before deletion using `tg-unload-kg-core`. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -## Deletion Verification - -### Confirm Deletion -```bash -# Verify core no longer exists -tg-show-kg-cores | grep "deleted-core-id" - -# Should return no results if successfully deleted -echo $? # Should be 1 (not found) -``` - -### Check Flow Impact -```bash -# Verify flows are not affected -tg-show-flows - -# Test that queries still work for remaining knowledge -tg-invoke-graph-rag -q "test query" -f remaining-flow -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-get-kg-core`](tg-get-kg-core.md) - Export knowledge core for backup -- [`tg-unload-kg-core`](tg-unload-kg-core.md) - Unload core from flows -- [`tg-put-kg-core`](tg-put-kg-core.md) - Store new knowledge cores - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) with the `delete-kg-core` operation to permanently remove knowledge cores. - -## Use Cases - -### Development Cleanup -```bash -# Remove test knowledge cores -tg-delete-kg-core --id "test-data-v1" -U developer -tg-delete-kg-core --id "experimental-core" -U developer -``` - -### Version Management -```bash -# Remove obsolete versions after upgrading -tg-get-kg-core --id "knowledge-v1" -o backup-v1.msgpack -tg-delete-kg-core --id "knowledge-v1" -# Keep only knowledge-v2 -``` - -### Storage Cleanup -```bash -# Clean up unused knowledge cores -for core in $(tg-show-kg-cores | grep "temp-"); do - echo "Deleting temporary core: $core" - tg-delete-kg-core --id "$core" -done -``` - -### Error Recovery -```bash -# Remove corrupted knowledge cores -tg-delete-kg-core --id "corrupted-core-2024" -tg-put-kg-core --id "restored-core-2024" -i restored-backup.msgpack -``` - -## Safe Deletion Workflow - -### Standard Procedure -```bash -# 1. Backup the knowledge core -tg-get-kg-core --id "target-core" -o "backup-$(date +%Y%m%d).msgpack" - -# 2. Unload from active flows -tg-unload-kg-core --id "target-core" --flow-id "production-flow" - -# 3. Verify no dependencies -tg-show-config | grep "target-core" - -# 4. Perform deletion -tg-delete-kg-core --id "target-core" - -# 5. Verify deletion -tg-show-kg-cores | grep "target-core" -``` - -### Bulk Deletion -```bash -# Delete multiple cores safely -cores_to_delete=("old-core-1" "old-core-2" "test-core") - -for core in "${cores_to_delete[@]}"; do - echo "Processing $core..." - - # Backup - tg-get-kg-core --id "$core" -o "backup-$core-$(date +%Y%m%d).msgpack" - - # Delete - tg-delete-kg-core --id "$core" - - # Verify - if tg-show-kg-cores | grep -q "$core"; then - echo "ERROR: $core still exists after deletion" - else - echo "SUCCESS: $core deleted" - fi -done -``` - -## Best Practices - -1. **Always Backup**: Export knowledge cores before deletion -2. **Check Dependencies**: Verify no flows are using the core -3. **Staged Deletion**: Delete test/development cores before production -4. **Verification**: Confirm deletion completed successfully -5. **Documentation**: Record why cores were deleted for audit purposes -6. **Access Control**: Ensure only authorized users can delete cores - -## Recovery Options - -### If Accidentally Deleted -```bash -# Restore from backup if available -tg-put-kg-core --id "restored-core" -i backup.msgpack - -# Reload into flows if needed -tg-load-kg-core --id "restored-core" --flow-id "production-flow" -``` - -### Audit Trail -```bash -# Keep records of deletions -echo "$(date): Deleted knowledge core 'old-core' - reason: obsolete version" >> deletion-log.txt -``` - -## System Impact - -### Storage Recovery -- Disk space is freed immediately -- Database indexes are updated -- System performance may improve - -### Service Continuity -- Running flows continue to operate -- Other knowledge cores remain unaffected -- New knowledge cores can use the same ID - -## Troubleshooting - -### Deletion Fails -```bash -# Check if core is loaded in flows -tg-show-flows | grep -A 10 "knowledge" - -# Force unload if necessary -tg-unload-kg-core --id "stuck-core" --flow-id "problem-flow" - -# Retry deletion -tg-delete-kg-core --id "stuck-core" -``` - -### Partial Deletion -```bash -# If core still appears in listings -tg-show-kg-cores | grep "partially-deleted" - -# Contact system administrator if deletion appears incomplete -``` \ No newline at end of file diff --git a/docs/cli/tg-delete-mcp-tool.md b/docs/cli/tg-delete-mcp-tool.md deleted file mode 100644 index b40ff87b..00000000 --- a/docs/cli/tg-delete-mcp-tool.md +++ /dev/null @@ -1,374 +0,0 @@ -# tg-delete-mcp-tool - -## Synopsis - -``` -tg-delete-mcp-tool [OPTIONS] --name NAME -``` - -## Description - -The `tg-delete-mcp-tool` command deletes MCP (Model Control Protocol) tools from the TrustGraph system. It removes MCP tool configurations by name from the 'mcp' configuration group. Once deleted, MCP tools are no longer available for agent use. - -This command is useful for: -- Removing obsolete or deprecated MCP tools -- Cleaning up MCP tool configurations -- Managing MCP tool registry maintenance -- Updating MCP tool deployments by removing old versions - -The command removes MCP tool configurations from the 'mcp' configuration group in the TrustGraph API. - -## Options - -- `-u, --api-url URL` - - TrustGraph API URL for configuration management - - Default: `http://localhost:8088/` (or `TRUSTGRAPH_URL` environment variable) - - Should point to a running TrustGraph API instance - -- `--name NAME` - - **Required.** MCP tool name to delete - - Must match an existing MCP tool name in the registry - - MCP tool will be completely removed from the system - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic MCP Tool Deletion - -Delete a weather MCP tool: -```bash -tg-delete-mcp-tool --name weather -``` - -### Calculator MCP Tool Deletion - -Delete a calculator MCP tool: -```bash -tg-delete-mcp-tool --name calculator -``` - -### Custom API URL - -Delete an MCP tool from a specific TrustGraph instance: -```bash -tg-delete-mcp-tool --api-url http://trustgraph.example.com:8088/ --name custom-mcp -``` - -### Batch MCP Tool Deletion - -Delete multiple MCP tools in a script: -```bash -#!/bin/bash -# Delete obsolete MCP tools -tg-delete-mcp-tool --name old-search -tg-delete-mcp-tool --name deprecated-calc -tg-delete-mcp-tool --name unused-mcp -``` - -### Conditional Deletion - -Delete an MCP tool only if it exists: -```bash -#!/bin/bash -# Check if MCP tool exists before deletion -if tg-show-mcp-tools | grep -q "test-mcp"; then - tg-delete-mcp-tool --name test-mcp - echo "MCP tool deleted" -else - echo "MCP tool not found" -fi -``` - -## Deletion Process - -The deletion process involves: - -1. **Existence Check**: Verify the MCP tool exists in the configuration -2. **Configuration Removal**: Delete the MCP tool configuration from the 'mcp' group - -The command performs validation before deletion to ensure the tool exists. - -## Error Handling - -The command handles various error conditions: - -- **Tool not found**: If the specified MCP tool name doesn't exist -- **API connection errors**: If the TrustGraph API is unavailable -- **Configuration errors**: If the MCP tool configuration cannot be removed - -Common error scenarios: -```bash -# MCP tool not found -tg-delete-mcp-tool --name nonexistent-mcp -# Output: MCP tool 'nonexistent-mcp' not found. - -# Missing required field -tg-delete-mcp-tool -# Output: Exception: Must specify --name for MCP tool to delete - -# API connection error -tg-delete-mcp-tool --api-url http://invalid-host:8088/ --name tool1 -# Output: Exception: [Connection error details] -``` - -## Verification - -The command provides feedback on the deletion process: - -- **Success**: `MCP tool 'tool-name' deleted successfully.` -- **Not found**: `MCP tool 'tool-name' not found.` -- **Error**: `Error deleting MCP tool 'tool-name': [error details]` - -## Advanced Usage - -### Safe Deletion with Verification - -Verify MCP tool exists before deletion: -```bash -#!/bin/bash -MCP_NAME="weather" - -# Check if MCP tool exists -if tg-show-mcp-tools | grep -q "^$MCP_NAME"; then - echo "Deleting MCP tool: $MCP_NAME" - tg-delete-mcp-tool --name "$MCP_NAME" - - # Verify deletion - if ! tg-show-mcp-tools | grep -q "^$MCP_NAME"; then - echo "MCP tool successfully deleted" - else - echo "MCP tool deletion failed" - fi -else - echo "MCP tool $MCP_NAME not found" -fi -``` - -### Backup Before Deletion - -Backup MCP tool configuration before deletion: -```bash -#!/bin/bash -MCP_NAME="important-mcp" - -# Export MCP tool configuration -echo "Backing up MCP tool configuration..." -tg-show-mcp-tools | grep -A 10 "^$MCP_NAME" > "${MCP_NAME}_backup.txt" - -# Delete MCP tool -echo "Deleting MCP tool..." -tg-delete-mcp-tool --name "$MCP_NAME" - -echo "MCP tool deleted, backup saved to ${MCP_NAME}_backup.txt" -``` - -### Cleanup Script - -Clean up multiple MCP tools based on patterns: -```bash -#!/bin/bash -# Delete all test MCP tools -echo "Cleaning up test MCP tools..." - -# Get list of test MCP tools -TEST_MCPS=$(tg-show-mcp-tools | grep "^test-" | cut -d: -f1) - -for mcp in $TEST_MCPS; do - echo "Deleting $mcp..." - tg-delete-mcp-tool --name "$mcp" -done - -echo "Cleanup complete" -``` - -### Environment-Specific Deletion - -Delete MCP tools from specific environments: -```bash -#!/bin/bash -# Delete development MCP tools from production -export TRUSTGRAPH_URL="http://prod.trustgraph.com:8088/" - -DEV_MCPS=("dev-mcp" "debug-mcp" "test-helper") - -for mcp in "${DEV_MCPS[@]}"; do - echo "Removing development MCP tool: $mcp" - tg-delete-mcp-tool --name "$mcp" -done -``` - -### MCP Service Shutdown - -Remove MCP tools when services are decommissioned: -```bash -#!/bin/bash -# Remove MCP tools for decommissioned service -SERVICE_NAME="old-service" - -# Find MCP tools for this service -MCP_TOOLS=$(tg-show-mcp-tools | grep "$SERVICE_NAME" | cut -d: -f1) - -for tool in $MCP_TOOLS; do - echo "Removing MCP tool for decommissioned service: $tool" - tg-delete-mcp-tool --name "$tool" -done -``` - -## Integration with Other Commands - -### With MCP Tool Management - -List and delete MCP tools: -```bash -# List all MCP tools -tg-show-mcp-tools - -# Delete specific MCP tool -tg-delete-mcp-tool --name unwanted-mcp - -# Verify deletion -tg-show-mcp-tools | grep unwanted-mcp -``` - -### With Configuration Management - -Manage MCP tool configurations: -```bash -# View current configuration -tg-show-config - -# Delete MCP tool -tg-delete-mcp-tool --name old-mcp - -# View updated configuration -tg-show-config -``` - -### With MCP Tool Invocation - -Ensure MCP tools can't be invoked after deletion: -```bash -# Delete MCP tool -tg-delete-mcp-tool --name deprecated-mcp - -# Verify tool is no longer available -tg-invoke-mcp-tool --name deprecated-mcp -# Should fail with tool not found error -``` - -## Best Practices - -1. **Verification**: Always verify MCP tool exists before deletion -2. **Backup**: Backup important MCP tool configurations before deletion -3. **Dependencies**: Check for MCP tool dependencies before deletion -4. **Service Coordination**: Coordinate with MCP service owners before deletion -5. **Testing**: Test system functionality after MCP tool deletion -6. **Documentation**: Document reasons for MCP tool deletion -7. **Gradual Removal**: Remove MCP tools gradually in production environments -8. **Monitoring**: Monitor for errors after MCP tool deletion - -## Troubleshooting - -### MCP Tool Not Found - -If MCP tool deletion reports "not found": -1. Verify the MCP tool name is correct -2. Check MCP tool exists with `tg-show-mcp-tools` -3. Ensure you're connected to the correct TrustGraph instance -4. Check for case sensitivity in MCP tool name - -### Deletion Errors - -If deletion fails: -1. Check TrustGraph API connectivity -2. Verify API permissions -3. Check for configuration corruption -4. Retry the deletion operation -5. Check MCP service status - -### Permission Errors - -If deletion fails due to permissions: -1. Verify API access credentials -2. Check TrustGraph API permissions -3. Ensure proper authentication -4. Contact system administrator if needed - -## Recovery - -### Restore Deleted MCP Tool - -If an MCP tool was accidentally deleted: -1. Use backup configuration if available -2. Re-register the MCP tool with `tg-set-mcp-tool` -3. Restore from version control if MCP tool definitions are tracked -4. Contact system administrator for recovery options - -### Verify System State - -After deletion, verify system state: -```bash -# Check MCP tool registry -tg-show-mcp-tools - -# Verify no orphaned configurations -tg-show-config | grep "mcp\." - -# Test MCP tool functionality -tg-invoke-mcp-tool --name remaining-tool -``` - -## MCP Tool Lifecycle - -### Development to Production - -Manage MCP tool lifecycle: -```bash -#!/bin/bash -# Promote MCP tool from dev to production - -# Remove development version -tg-delete-mcp-tool --name dev-tool - -# Add production version -tg-set-mcp-tool --name prod-tool --tool-url "http://prod.mcp.com/api" -``` - -### Version Management - -Manage MCP tool versions: -```bash -#!/bin/bash -# Update MCP tool to new version - -# Remove old version -tg-delete-mcp-tool --name tool-v1 - -# Add new version -tg-set-mcp-tool --name tool-v2 --tool-url "http://new.mcp.com/api" -``` - -## Security Considerations - -When deleting MCP tools: - -1. **Access Control**: Ensure proper authorization for deletion -2. **Audit Trail**: Log MCP tool deletions for security auditing -3. **Impact Assessment**: Assess security impact of tool removal -4. **Credential Cleanup**: Remove associated credentials if applicable -5. **Network Security**: Update firewall rules if MCP endpoints are no longer needed - -## Related Commands - -- [`tg-show-mcp-tools`](tg-show-mcp-tools.md) - Display registered MCP tools -- [`tg-set-mcp-tool`](tg-set-mcp-tool.md) - Configure and register MCP tools -- [`tg-invoke-mcp-tool`](tg-invoke-mcp-tool.md) - Execute MCP tools -- [`tg-delete-tool`](tg-delete-tool.md) - Delete regular agent tools - -## See Also - -- MCP Protocol Documentation -- TrustGraph MCP Integration Guide -- MCP Tool Management Manual \ No newline at end of file diff --git a/docs/cli/tg-delete-tool.md b/docs/cli/tg-delete-tool.md deleted file mode 100644 index 7b51c1b4..00000000 --- a/docs/cli/tg-delete-tool.md +++ /dev/null @@ -1,317 +0,0 @@ -# tg-delete-tool - -## Synopsis - -``` -tg-delete-tool [OPTIONS] --id ID -``` - -## Description - -The `tg-delete-tool` command deletes tools from the TrustGraph system. It removes tool configurations by ID from the agent configuration and updates the tool index accordingly. Once deleted, tools are no longer available for agent use. - -This command is useful for: -- Removing obsolete or deprecated tools -- Cleaning up tool configurations -- Managing tool registry maintenance -- Updating tool deployments by removing old versions - -The command removes both the tool from the tool index and deletes the complete tool configuration from the TrustGraph API. - -## Options - -- `-u, --api-url URL` - - TrustGraph API URL for configuration management - - Default: `http://localhost:8088/` (or `TRUSTGRAPH_URL` environment variable) - - Should point to a running TrustGraph API instance - -- `--id ID` - - **Required.** Tool ID to delete - - Must match an existing tool ID in the registry - - Tool will be completely removed from the system - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic Tool Deletion - -Delete a weather tool: -```bash -tg-delete-tool --id weather -``` - -### Calculator Tool Deletion - -Delete a calculator tool: -```bash -tg-delete-tool --id calculator -``` - -### Custom API URL - -Delete a tool from a specific TrustGraph instance: -```bash -tg-delete-tool --api-url http://trustgraph.example.com:8088/ --id custom-tool -``` - -### Batch Tool Deletion - -Delete multiple tools in a script: -```bash -#!/bin/bash -# Delete obsolete tools -tg-delete-tool --id old-search -tg-delete-tool --id deprecated-calc -tg-delete-tool --id unused-tool -``` - -### Conditional Deletion - -Delete a tool only if it exists: -```bash -#!/bin/bash -# Check if tool exists before deletion -if tg-show-tools | grep -q "test-tool"; then - tg-delete-tool --id test-tool - echo "Tool deleted" -else - echo "Tool not found" -fi -``` - -## Deletion Process - -The deletion process involves two steps: - -1. **Index Update**: Remove the tool ID from the tool index -2. **Configuration Removal**: Delete the tool configuration data - -Both operations must succeed for the deletion to be complete. - -## Error Handling - -The command handles various error conditions: - -- **Tool not found**: If the specified tool ID doesn't exist -- **Missing configuration**: If tool is in index but configuration is missing -- **API connection errors**: If the TrustGraph API is unavailable -- **Partial deletion**: If index update or configuration removal fails - -Common error scenarios: -```bash -# Tool not found -tg-delete-tool --id nonexistent-tool -# Output: Tool 'nonexistent-tool' not found in tool index. - -# Missing required field -tg-delete-tool -# Output: Exception: Must specify --id for tool to delete - -# API connection error -tg-delete-tool --api-url http://invalid-host:8088/ --id tool1 -# Output: Exception: [Connection error details] -``` - -## Verification - -The command provides feedback on the deletion process: - -- **Success**: `Tool 'tool-id' deleted successfully.` -- **Not found**: `Tool 'tool-id' not found in tool index.` -- **Configuration missing**: `Tool configuration for 'tool-id' not found.` -- **Error**: `Error deleting tool 'tool-id': [error details]` - -## Advanced Usage - -### Safe Deletion with Verification - -Verify tool exists before deletion: -```bash -#!/bin/bash -TOOL_ID="weather" - -# Check if tool exists -if tg-show-tools | grep -q "^$TOOL_ID:"; then - echo "Deleting tool: $TOOL_ID" - tg-delete-tool --id "$TOOL_ID" - - # Verify deletion - if ! tg-show-tools | grep -q "^$TOOL_ID:"; then - echo "Tool successfully deleted" - else - echo "Tool deletion failed" - fi -else - echo "Tool $TOOL_ID not found" -fi -``` - -### Backup Before Deletion - -Backup tool configuration before deletion: -```bash -#!/bin/bash -TOOL_ID="important-tool" - -# Export tool configuration -echo "Backing up tool configuration..." -tg-show-tools | grep -A 20 "^$TOOL_ID:" > "${TOOL_ID}_backup.txt" - -# Delete tool -echo "Deleting tool..." -tg-delete-tool --id "$TOOL_ID" - -echo "Tool deleted, backup saved to ${TOOL_ID}_backup.txt" -``` - -### Cleanup Script - -Clean up multiple tools based on patterns: -```bash -#!/bin/bash -# Delete all test tools -echo "Cleaning up test tools..." - -# Get list of test tools -TEST_TOOLS=$(tg-show-tools | grep "^test-" | cut -d: -f1) - -for tool in $TEST_TOOLS; do - echo "Deleting $tool..." - tg-delete-tool --id "$tool" -done - -echo "Cleanup complete" -``` - -### Environment-Specific Deletion - -Delete tools from specific environments: -```bash -#!/bin/bash -# Delete development tools from production -export TRUSTGRAPH_URL="http://prod.trustgraph.com:8088/" - -DEV_TOOLS=("dev-tool" "debug-tool" "test-helper") - -for tool in "${DEV_TOOLS[@]}"; do - echo "Removing development tool: $tool" - tg-delete-tool --id "$tool" -done -``` - -## Integration with Other Commands - -### With Tool Management - -List and delete tools: -```bash -# List all tools -tg-show-tools - -# Delete specific tool -tg-delete-tool --id unwanted-tool - -# Verify deletion -tg-show-tools | grep unwanted-tool -``` - -### With Configuration Management - -Manage tool configurations: -```bash -# View current configuration -tg-show-config - -# Delete tool -tg-delete-tool --id old-tool - -# View updated configuration -tg-show-config -``` - -### With Agent Workflows - -Ensure agents don't use deleted tools: -```bash -# Delete tool -tg-delete-tool --id deprecated-tool - -# Check agent configuration -tg-show-config | grep deprecated-tool -``` - -## Best Practices - -1. **Verification**: Always verify tool exists before deletion -2. **Backup**: Backup important tool configurations before deletion -3. **Dependencies**: Check for tool dependencies before deletion -4. **Testing**: Test system functionality after tool deletion -5. **Documentation**: Document reasons for tool deletion -6. **Gradual Removal**: Remove tools gradually in production environments -7. **Monitoring**: Monitor for errors after tool deletion - -## Troubleshooting - -### Tool Not Found - -If tool deletion reports "not found": -1. Verify the tool ID is correct -2. Check tool exists with `tg-show-tools` -3. Ensure you're connected to the correct TrustGraph instance -4. Check for case sensitivity in tool ID - -### Partial Deletion - -If deletion partially fails: -1. Check TrustGraph API connectivity -2. Verify API permissions -3. Check for configuration corruption -4. Retry the deletion operation -5. Manual cleanup may be required - -### Permission Errors - -If deletion fails due to permissions: -1. Verify API access credentials -2. Check TrustGraph API permissions -3. Ensure proper authentication -4. Contact system administrator if needed - -## Recovery - -### Restore Deleted Tool - -If a tool was accidentally deleted: -1. Use backup configuration if available -2. Re-register the tool with `tg-set-tool` -3. Restore from version control if tool definitions are tracked -4. Contact system administrator for recovery options - -### Verify System State - -After deletion, verify system state: -```bash -# Check tool index consistency -tg-show-tools - -# Verify no orphaned configurations -tg-show-config | grep "tool\." - -# Test agent functionality -tg-invoke-agent --prompt "Test prompt" -``` - -## Related Commands - -- [`tg-show-tools`](tg-show-tools.md) - Display registered tools -- [`tg-set-tool`](tg-set-tool.md) - Configure and register tools -- [`tg-delete-mcp-tool`](tg-delete-mcp-tool.md) - Delete MCP tools -- [`tg-show-config`](tg-show-config.md) - View system configuration - -## See Also - -- TrustGraph Tool Management Guide -- Agent Configuration Documentation -- System Administration Manual \ No newline at end of file diff --git a/docs/cli/tg-dump-msgpack.md b/docs/cli/tg-dump-msgpack.md deleted file mode 100644 index 4f06f97f..00000000 --- a/docs/cli/tg-dump-msgpack.md +++ /dev/null @@ -1,489 +0,0 @@ -# tg-dump-msgpack - -Reads and analyzes knowledge core files in MessagePack format for diagnostic purposes. - -## Synopsis - -```bash -tg-dump-msgpack -i INPUT_FILE [options] -``` - -## Description - -The `tg-dump-msgpack` command is a diagnostic utility that reads knowledge core files stored in MessagePack format and outputs their contents in JSON format or provides a summary analysis. This tool is primarily used for debugging, data inspection, and understanding the structure of knowledge cores. - -MessagePack is a binary serialization format that TrustGraph uses for efficient storage and transfer of knowledge graph data. - -## Options - -### Required Arguments - -- `-i, --input-file FILE`: Input MessagePack file to read - -### Optional Arguments - -- `-s, --summary`: Show a summary analysis of the file contents -- `-r, --records`: Dump individual records in JSON format (default behavior) - -## Examples - -### Dump Records as JSON -```bash -tg-dump-msgpack -i knowledge-core.msgpack -``` - -### Show Summary Analysis -```bash -tg-dump-msgpack -i knowledge-core.msgpack --summary -``` - -### Save Output to File -```bash -tg-dump-msgpack -i knowledge-core.msgpack > analysis.json -``` - -### Analyze Multiple Files -```bash -for file in *.msgpack; do - echo "=== $file ===" - tg-dump-msgpack -i "$file" --summary - echo -done -``` - -## Output Formats - -### Record Output (Default) -With `-r` or `--records` (default behavior), the command outputs each record as a separate JSON object: - -```json -["t", {"m": {"m": [{"s": {"v": "uri1"}, "p": {"v": "predicate"}, "o": {"v": "object"}}]}}] -["ge", {"v": [[0.1, 0.2, 0.3, ...]]}] -["de", {"metadata": {...}, "chunks": [...]}] -``` - -### Summary Output -With `-s` or `--summary`, the command provides an analytical overview: - -``` -Vector dimension: 384 -- NASA Challenger Report -- Technical Documentation -- Safety Engineering Guidelines -``` - -## Record Types - -MessagePack files may contain different types of records: - -### Triple Records ("t") -RDF triples representing knowledge graph relationships: -```json -["t", { - "m": { - "m": [{ - "s": {"v": "http://example.org/subject"}, - "p": {"v": "http://example.org/predicate"}, - "o": {"v": "object value"} - }] - } -}] -``` - -### Graph Embeddings ("ge") -Vector embeddings for graph entities: -```json -["ge", { - "v": [[0.1, 0.2, 0.3, 0.4, ...]] -}] -``` - -### Document Embeddings ("de") -Document chunk embeddings with metadata: -```json -["de", { - "metadata": { - "id": "doc-123", - "user": "trustgraph", - "collection": "default" - }, - "chunks": [{ - "chunk": "text content", - "vectors": [0.1, 0.2, 0.3, ...] - }] -}] -``` - -## Use Cases - -### Data Inspection -```bash -# Quick peek at file structure -tg-dump-msgpack -i mystery-core.msgpack --summary - -# Detailed record analysis -tg-dump-msgpack -i knowledge-core.msgpack | head -20 -``` - -### Debugging Knowledge Cores -```bash -# Check if file contains expected data types -tg-dump-msgpack -i core.msgpack | grep -o '^\["[^"]*"' | sort | uniq -c - -# Find specific entities -tg-dump-msgpack -i core.msgpack | grep "NASA" - -# Check vector dimensions -tg-dump-msgpack -i core.msgpack --summary | grep "Vector dimension" -``` - -### Quality Assurance -```bash -# Validate file completeness -validate_msgpack() { - local file="$1" - - echo "Validating: $file" - - # Check file exists and is readable - if [ ! -r "$file" ]; then - echo "Error: Cannot read file $file" - return 1 - fi - - # Get summary - summary=$(tg-dump-msgpack -i "$file" --summary 2>/dev/null) - - if [ $? -ne 0 ]; then - echo "Error: Failed to read MessagePack file" - return 1 - fi - - # Check for vector dimension (indicates embeddings present) - if echo "$summary" | grep -q "Vector dimension:"; then - dim=$(echo "$summary" | grep "Vector dimension:" | awk '{print $3}') - echo "✓ Contains embeddings (dimension: $dim)" - else - echo "⚠ No embeddings found" - fi - - # Count labels (indicates entities present) - label_count=$(echo "$summary" | grep "^-" | wc -l) - echo "✓ Found $label_count labeled entities" - - return 0 -} - -# Validate multiple files -for file in cores/*.msgpack; do - validate_msgpack "$file" -done -``` - -### Data Migration -```bash -# Convert MessagePack to JSON for processing -convert_to_json() { - local input="$1" - local output="$2" - - echo "Converting $input to $output..." - tg-dump-msgpack -i "$input" > "$output" - - # Add array wrapper for valid JSON array - sed -i '1i[' "$output" - sed -i '$a]' "$output" - sed -i 's/$/,/' "$output" - sed -i '$s/,$//' "$output" - - echo "Conversion complete" -} - -convert_to_json "knowledge.msgpack" "knowledge.json" -``` - -### Analysis and Reporting -```bash -# Generate comprehensive analysis report -analyze_msgpack() { - local file="$1" - local report_file="${file%.msgpack}_analysis.txt" - - echo "MessagePack Analysis Report" > "$report_file" - echo "File: $file" >> "$report_file" - echo "Generated: $(date)" >> "$report_file" - echo "=============================" >> "$report_file" - echo "" >> "$report_file" - - # Summary information - echo "Summary:" >> "$report_file" - tg-dump-msgpack -i "$file" --summary >> "$report_file" - echo "" >> "$report_file" - - # Record type analysis - echo "Record Type Distribution:" >> "$report_file" - tg-dump-msgpack -i "$file" | \ - grep -o '^\["[^"]*"' | \ - sort | uniq -c | \ - awk '{print " " $2 ": " $1 " records"}' >> "$report_file" - echo "" >> "$report_file" - - # File statistics - file_size=$(stat -c%s "$file") - echo "File Statistics:" >> "$report_file" - echo " Size: $file_size bytes" >> "$report_file" - echo " Size (human): $(numfmt --to=iec-i --suffix=B $file_size)" >> "$report_file" - - echo "Analysis saved to: $report_file" -} - -# Analyze all MessagePack files -for file in *.msgpack; do - analyze_msgpack "$file" -done -``` - -### Comparative Analysis -```bash -# Compare two knowledge cores -compare_msgpack() { - local file1="$1" - local file2="$2" - - echo "Comparing MessagePack files:" - echo "File 1: $file1" - echo "File 2: $file2" - echo "==========================" - - # Compare summaries - echo "Summary comparison:" - echo "File 1:" - tg-dump-msgpack -i "$file1" --summary | sed 's/^/ /' - echo "" - echo "File 2:" - tg-dump-msgpack -i "$file2" --summary | sed 's/^/ /' - echo "" - - # Compare record counts - echo "Record type comparison:" - echo "File 1:" - tg-dump-msgpack -i "$file1" | \ - grep -o '^\["[^"]*"' | \ - sort | uniq -c | \ - awk '{print " " $2 ": " $1}' | \ - sort - - echo "File 2:" - tg-dump-msgpack -i "$file2" | \ - grep -o '^\["[^"]*"' | \ - sort | uniq -c | \ - awk '{print " " $2 ": " $1}' | \ - sort -} - -compare_msgpack "core1.msgpack" "core2.msgpack" -``` - -## Advanced Usage - -### Large File Processing -```bash -# Process large files in chunks -process_large_msgpack() { - local file="$1" - local chunk_size=1000 - - echo "Processing large file: $file" - - # Count total records first - total_records=$(tg-dump-msgpack -i "$file" | wc -l) - echo "Total records: $total_records" - - # Process in chunks - tg-dump-msgpack -i "$file" | \ - split -l $chunk_size - "chunk_" - - echo "Split into chunks of $chunk_size records each" - - # Process each chunk - for chunk in chunk_*; do - echo "Processing $chunk..." - # Add your processing logic here - wc -l "$chunk" - done - - # Clean up - rm chunk_* -} -``` - -### Data Extraction -```bash -# Extract specific data types -extract_triples() { - local file="$1" - local output="triples.json" - - echo "Extracting triples from $file..." - tg-dump-msgpack -i "$file" | \ - grep '^\["t"' > "$output" - - echo "Triples saved to: $output" -} - -extract_embeddings() { - local file="$1" - local output="embeddings.json" - - echo "Extracting embeddings from $file..." - tg-dump-msgpack -i "$file" | \ - grep -E '^\["(ge|de)"' > "$output" - - echo "Embeddings saved to: $output" -} - -# Extract all data types -extract_triples "knowledge.msgpack" -extract_embeddings "knowledge.msgpack" -``` - -### Integration with Other Tools -```bash -# Convert MessagePack to formats for other tools -msgpack_to_turtle() { - local input="$1" - local output="$2" - - echo "Converting MessagePack to Turtle format..." - - # Extract triples and convert to Turtle - tg-dump-msgpack -i "$input" | \ - grep '^\["t"' | \ - jq -r '.[1].m.m[] | - "<" + .s.v + "> <" + .p.v + "> " + - (if .o.e then "<" + .o.v + ">" else "\"" + .o.v + "\"" end) + " ."' \ - > "$output" - - echo "Turtle format saved to: $output" -} - -msgpack_to_turtle "knowledge.msgpack" "knowledge.ttl" -``` - -## Error Handling - -### File Not Found -```bash -Exception: [Errno 2] No such file or directory: 'missing.msgpack' -``` -**Solution**: Check file path and ensure the file exists. - -### Invalid MessagePack Format -```bash -Exception: Unpack failed -``` -**Solution**: Verify the file is a valid MessagePack file and not corrupted. - -### Memory Issues with Large Files -```bash -MemoryError: Unable to allocate memory -``` -**Solution**: Process large files in chunks or use streaming approaches. - -### Permission Errors -```bash -Exception: [Errno 13] Permission denied -``` -**Solution**: Check file permissions and ensure read access. - -## Performance Considerations - -### File Size Optimization -```bash -# Check file compression efficiency -check_compression() { - local file="$1" - - original_size=$(stat -c%s "$file") - - # Test compression - gzip -c "$file" > "${file}.gz" - compressed_size=$(stat -c%s "${file}.gz") - - ratio=$(echo "scale=2; $compressed_size * 100 / $original_size" | bc) - - echo "Original: $(numfmt --to=iec-i --suffix=B $original_size)" - echo "Compressed: $(numfmt --to=iec-i --suffix=B $compressed_size)" - echo "Compression ratio: ${ratio}%" - - rm "${file}.gz" -} -``` - -### Processing Speed -```bash -# Time processing operations -time_msgpack_ops() { - local file="$1" - - echo "Timing MessagePack operations for: $file" - - # Time summary generation - echo "Summary generation:" - time tg-dump-msgpack -i "$file" --summary > /dev/null - - # Time full dump - echo "Full record dump:" - time tg-dump-msgpack -i "$file" > /dev/null -} -``` - -## Related Commands - -- [`tg-get-kg-core`](tg-get-kg-core.md) - Export knowledge cores to MessagePack -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load MessagePack knowledge cores -- [`tg-save-doc-embeds`](tg-save-doc-embeds.md) - Save document embeddings to MessagePack - -## Best Practices - -1. **File Validation**: Always validate MessagePack files before processing -2. **Memory Management**: Be cautious with large files to avoid memory issues -3. **Backup**: Keep backups of original MessagePack files before analysis -4. **Incremental Processing**: Process large files incrementally when possible -5. **Documentation**: Document the structure and content of your MessagePack files -6. **Version Control**: Track changes in MessagePack file formats over time - -## Troubleshooting - -### Corrupted Files -```bash -# Test file integrity -if tg-dump-msgpack -i "test.msgpack" --summary > /dev/null 2>&1; then - echo "File appears valid" -else - echo "File may be corrupted" -fi -``` - -### Empty or Incomplete Files -```bash -# Check for empty files -if [ ! -s "test.msgpack" ]; then - echo "File is empty" -fi - -# Check record count -record_count=$(tg-dump-msgpack -i "test.msgpack" 2>/dev/null | wc -l) -echo "Records found: $record_count" -``` - -### Format Issues -```bash -# Validate JSON output -tg-dump-msgpack -i "test.msgpack" | head -1 | jq . > /dev/null -if [ $? -eq 0 ]; then - echo "JSON output is valid" -else - echo "JSON output may be malformed" -fi -``` \ No newline at end of file diff --git a/docs/cli/tg-get-flow-blueprint.md b/docs/cli/tg-get-flow-blueprint.md deleted file mode 100644 index 1998c3b1..00000000 --- a/docs/cli/tg-get-flow-blueprint.md +++ /dev/null @@ -1,344 +0,0 @@ -# tg-get-flow-blueprint - -Retrieves and displays a flow blueprint definition in JSON format. - -## Synopsis - -```bash -tg-get-flow-blueprint -n CLASS_NAME [options] -``` - -## Description - -The `tg-get-flow-blueprint` command retrieves a stored flow blueprint definition from TrustGraph and displays it in formatted JSON. This is useful for examining flow blueprint configurations, creating backups, or preparing to modify existing flow blueprintes. - -The output can be saved to files for version control, documentation, or as input for creating new flow blueprintes with `tg-put-flow-blueprint`. - -## Options - -### Required Arguments - -- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to retrieve - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Display Flow Blueprint Definition -```bash -tg-get-flow-blueprint -n "document-processing" -``` - -### Save Flow Blueprint to File -```bash -tg-get-flow-blueprint -n "production-flow" > production-flow-backup.json -``` - -### Compare Flow Blueprintes -```bash -# Get multiple flow blueprintes for comparison -tg-get-flow-blueprint -n "dev-flow" > dev-flow.json -tg-get-flow-blueprint -n "prod-flow" > prod-flow.json -diff dev-flow.json prod-flow.json -``` - -### Using Custom API URL -```bash -tg-get-flow-blueprint -n "remote-flow" -u http://production:8088/ -``` - -## Output Format - -The command outputs the flow blueprint definition in formatted JSON: - -```json -{ - "description": "Document processing and analysis flow", - "interfaces": { - "agent": { - "request": "non-persistent://tg/request/agent:doc-proc", - "response": "non-persistent://tg/response/agent:doc-proc" - }, - "document-rag": { - "request": "non-persistent://tg/request/document-rag:doc-proc", - "response": "non-persistent://tg/response/document-rag:doc-proc" - }, - "text-load": "persistent://tg/flow/text-document-load:doc-proc", - "document-load": "persistent://tg/flow/document-load:doc-proc", - "triples-store": "persistent://tg/flow/triples-store:doc-proc" - }, - "tags": ["production", "document-processing"] -} -``` - -### Key Components - -#### Description -Human-readable description of the flow blueprint purpose and capabilities. - -#### Interfaces -Service definitions showing: -- **Request/Response Services**: Services with both request and response queues -- **Fire-and-Forget Services**: Services with only input queues - -#### Tags (Optional) -Categorization tags for organizing flow blueprintes. - -## Prerequisites - -### Flow Blueprint Must Exist -Verify the flow blueprint exists before retrieval: - -```bash -# Check available flow blueprintes -tg-show-flow-blueprints - -# Look for specific class -tg-show-flow-blueprints | grep "target-class" -``` - -## Error Handling - -### Flow Blueprint Not Found -```bash -Exception: Flow blueprint 'invalid-class' not found -``` -**Solution**: Check available classes with `tg-show-flow-blueprints` and verify the class name. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied to flow blueprint -``` -**Solution**: Verify user permissions for accessing flow blueprint definitions. - -## Use Cases - -### Configuration Backup -```bash -# Backup all flow blueprintes -mkdir -p flow-class-backups/$(date +%Y%m%d) -tg-show-flow-blueprints | awk '{print $1}' | while read class; do - if [ "$class" != "flow" ]; then # Skip header - tg-get-flow-blueprint -n "$class" > "flow-class-backups/$(date +%Y%m%d)/$class.json" - fi -done -``` - -### Flow Blueprint Migration -```bash -# Export from source environment -tg-get-flow-blueprint -n "production-flow" -u http://source:8088/ > prod-flow.json - -# Import to target environment -tg-put-flow-blueprint -n "production-flow" -c "$(cat prod-flow.json)" -u http://target:8088/ -``` - -### Template Creation -```bash -# Get existing flow blueprint as template -tg-get-flow-blueprint -n "base-flow" > template.json - -# Modify template and create new class -sed 's/base-flow/new-flow/g' template.json > new-flow.json -tg-put-flow-blueprint -n "custom-flow" -c "$(cat new-flow.json)" -``` - -### Configuration Analysis -```bash -# Analyze flow blueprint configurations -tg-get-flow-blueprint -n "complex-flow" | jq '.interfaces | keys' -tg-get-flow-blueprint -n "complex-flow" | jq '.interfaces | length' -``` - -### Version Control Integration -```bash -# Store flow blueprintes in git -mkdir -p flow-classes -tg-get-flow-blueprint -n "main-flow" > flow-classes/main-flow.json -git add flow-classes/main-flow.json -git commit -m "Update main-flow configuration" -``` - -## JSON Processing - -### Extract Specific Information -```bash -# Get only interface names -tg-get-flow-blueprint -n "my-flow" | jq -r '.interfaces | keys[]' - -# Get only description -tg-get-flow-blueprint -n "my-flow" | jq -r '.description' - -# Get request queues -tg-get-flow-blueprint -n "my-flow" | jq -r '.interfaces | to_entries[] | select(.value.request) | .value.request' -``` - -### Validate Configuration -```bash -# Validate JSON structure -tg-get-flow-blueprint -n "my-flow" | jq . > /dev/null && echo "Valid JSON" || echo "Invalid JSON" - -# Check required fields -config=$(tg-get-flow-blueprint -n "my-flow") -echo "$config" | jq -e '.description' > /dev/null || echo "Missing description" -echo "$config" | jq -e '.interfaces' > /dev/null || echo "Missing interfaces" -``` - -## Integration with Other Commands - -### Flow Blueprint Lifecycle -```bash -# 1. Examine existing flow blueprint -tg-get-flow-blueprint -n "old-flow" - -# 2. Save backup -tg-get-flow-blueprint -n "old-flow" > old-flow-backup.json - -# 3. Modify configuration -cp old-flow-backup.json new-flow.json -# Edit new-flow.json as needed - -# 4. Upload new version -tg-put-flow-blueprint -n "updated-flow" -c "$(cat new-flow.json)" - -# 5. Test new flow blueprint -tg-start-flow -n "updated-flow" -i "test-instance" -d "Testing updated flow" -``` - -### Bulk Operations -```bash -# Process multiple flow blueprintes -flow_classes=("flow1" "flow2" "flow3") -for class in "${flow_classes[@]}"; do - echo "Processing $class..." - tg-get-flow-blueprint -n "$class" > "backup-$class.json" - - # Modify configuration - sed 's/old-pattern/new-pattern/g' "backup-$class.json" > "updated-$class.json" - - # Upload updated version - tg-put-flow-blueprint -n "$class" -c "$(cat updated-$class.json)" -done -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definitions -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes -- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definitions -- [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `get-class` operation to retrieve flow blueprint definitions. - -## Advanced Usage - -### Configuration Diff -```bash -# Compare flow blueprint versions -tg-get-flow-blueprint -n "flow-v1" > v1.json -tg-get-flow-blueprint -n "flow-v2" > v2.json -diff -u v1.json v2.json -``` - -### Extract Queue Information -```bash -# Get all queue names from flow blueprint -tg-get-flow-blueprint -n "my-flow" | jq -r ' - .interfaces | - to_entries[] | - if .value | type == "object" then - .value.request, .value.response - else - .value - end -' | sort | uniq -``` - -### Configuration Validation Script -```bash -#!/bin/bash -# validate-flow-class.sh -flow_class="$1" - -if [ -z "$flow_class" ]; then - echo "Usage: $0 " - exit 1 -fi - -echo "Validating flow blueprint: $flow_class" - -# Get configuration -config=$(tg-get-flow-blueprint -n "$flow_class" 2>/dev/null) -if [ $? -ne 0 ]; then - echo "ERROR: Flow blueprint not found" - exit 1 -fi - -# Validate JSON -echo "$config" | jq . > /dev/null -if [ $? -ne 0 ]; then - echo "ERROR: Invalid JSON structure" - exit 1 -fi - -# Check required fields -desc=$(echo "$config" | jq -r '.description // empty') -if [ -z "$desc" ]; then - echo "WARNING: Missing description" -fi - -interfaces=$(echo "$config" | jq -r '.interfaces // empty') -if [ -z "$interfaces" ] || [ "$interfaces" = "null" ]; then - echo "ERROR: Missing interfaces" - exit 1 -fi - -echo "Flow blueprint validation passed" -``` - -## Best Practices - -1. **Regular Backups**: Save flow blueprint definitions before modifications -2. **Version Control**: Store configurations in version control systems -3. **Documentation**: Include meaningful descriptions in flow blueprintes -4. **Validation**: Validate JSON structure before using configurations -5. **Template Management**: Use existing classes as templates for new ones -6. **Change Tracking**: Document changes when updating flow blueprintes - -## Troubleshooting - -### Empty Output -```bash -# If command returns empty output -tg-get-flow-blueprint -n "my-flow" -# Check if flow blueprint exists -tg-show-flow-blueprints | grep "my-flow" -``` - -### Invalid JSON Output -```bash -# If output appears corrupted -tg-get-flow-blueprint -n "my-flow" | jq . -# Should show parsing error if JSON is invalid -``` - -### Permission Issues -```bash -# If access denied errors occur -# Verify authentication and user permissions -# Contact system administrator if needed -``` \ No newline at end of file diff --git a/docs/cli/tg-get-kg-core.md b/docs/cli/tg-get-kg-core.md deleted file mode 100644 index 0f77199e..00000000 --- a/docs/cli/tg-get-kg-core.md +++ /dev/null @@ -1,365 +0,0 @@ -# tg-get-kg-core - -Exports a knowledge core from TrustGraph to a MessagePack file. - -## Synopsis - -```bash -tg-get-kg-core --id CORE_ID -o OUTPUT_FILE [options] -``` - -## Description - -The `tg-get-kg-core` command retrieves a stored knowledge core from TrustGraph and exports it to a MessagePack format file. This allows you to backup knowledge cores, transfer them between systems, or examine their contents offline. - -The exported file contains both RDF triples and graph embeddings in a compact binary format that can later be imported using `tg-put-kg-core`. - -## Options - -### Required Arguments - -- `--id, --identifier CORE_ID`: Identifier of the knowledge core to export -- `-o, --output OUTPUT_FILE`: Path for the output MessagePack file - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `ws://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) - -## Examples - -### Basic Knowledge Core Export -```bash -tg-get-kg-core --id "research-knowledge" -o research-backup.msgpack -``` - -### Export with Specific User -```bash -tg-get-kg-core \ - --id "medical-knowledge" \ - -o medical-backup.msgpack \ - -U medical-team -``` - -### Export with Timestamped Filename -```bash -tg-get-kg-core \ - --id "production-core" \ - -o "production-backup-$(date +%Y%m%d-%H%M%S).msgpack" -``` - -### Using Custom API URL -```bash -tg-get-kg-core \ - --id "remote-core" \ - -o remote-backup.msgpack \ - -u ws://production:8088/ -``` - -## Prerequisites - -### Knowledge Core Must Exist -Verify the knowledge core exists: - -```bash -# Check available knowledge cores -tg-show-kg-cores - -# Verify specific core exists -tg-show-kg-cores | grep "target-core-id" -``` - -### Output Directory Must Be Writable -Ensure the output directory exists and is writable: - -```bash -# Create backup directory if needed -mkdir -p backups - -# Export to backup directory -tg-get-kg-core --id "my-core" -o backups/my-core-backup.msgpack -``` - -## Export Process - -1. **Connection**: Establishes WebSocket connection to Knowledge API -2. **Request**: Sends get-kg-core request with core ID and user -3. **Streaming**: Receives data in chunks via WebSocket -4. **Processing**: Converts response data to MessagePack format -5. **Writing**: Writes binary data to output file -6. **Summary**: Reports statistics on exported data - -## Output Format - -The exported MessagePack file contains structured data with two types of messages: - -### Triple Messages (`"t"`) -Contains RDF triples (facts and relationships): -```python -("t", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "t": [ # triples array - { - "s": {"value": "subject", "is_uri": true}, - "p": {"value": "predicate", "is_uri": true}, - "o": {"value": "object", "is_uri": false} - } - ] -}) -``` - -### Graph Embedding Messages (`"ge"`) -Contains vector embeddings for entities: -```python -("ge", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "e": [ # entities array - { - "e": {"value": "entity", "is_uri": true}, - "v": [[0.1, 0.2, 0.3]] # vectors - } - ] -}) -``` - -## Output Statistics - -The command reports the number of messages exported: - -```bash -Got: 150 triple, 75 GE messages. -``` - -Where: -- **triple**: Number of RDF triple message chunks exported -- **GE**: Number of graph embedding message chunks exported - -## Error Handling - -### Knowledge Core Not Found -```bash -Exception: Knowledge core 'invalid-core' not found -``` -**Solution**: Check available cores with `tg-show-kg-cores` and verify the core ID. - -### Permission Denied -```bash -Exception: Access denied to knowledge core -``` -**Solution**: Verify user permissions for the specified knowledge core. - -### File Permission Errors -```bash -Exception: Permission denied: output.msgpack -``` -**Solution**: Check write permissions for the output directory and filename. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Disk Space Errors -```bash -Exception: No space left on device -``` -**Solution**: Free up disk space or use a different output location. - -## File Management - -### Backup Organization -```bash -# Create organized backup structure -mkdir -p backups/{daily,weekly,monthly} - -# Daily backup -tg-get-kg-core --id "prod-core" -o "backups/daily/prod-$(date +%Y%m%d).msgpack" - -# Weekly backup -tg-get-kg-core --id "prod-core" -o "backups/weekly/prod-week-$(date +%V).msgpack" -``` - -### Compression -```bash -# Export and compress for storage -tg-get-kg-core --id "large-core" -o large-core.msgpack -gzip large-core.msgpack - -# Results in large-core.msgpack.gz -``` - -## File Verification - -### Check File Size -```bash -# Export and verify -tg-get-kg-core --id "my-core" -o my-core.msgpack -ls -lh my-core.msgpack - -# Typical sizes: small cores (KB-MB), large cores (MB-GB) -``` - -### Validate Export -```bash -# Test the exported file by importing to different ID -tg-put-kg-core --id "test-import" -i my-core.msgpack -tg-show-kg-cores | grep "test-import" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL (automatically converted to WebSocket format) - -## Related Commands - -- [`tg-put-kg-core`](tg-put-kg-core.md) - Import knowledge core from MessagePack file -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-delete-kg-core`](tg-delete-kg-core.md) - Delete knowledge cores -- [`tg-dump-msgpack`](tg-dump-msgpack.md) - Examine MessagePack file contents - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) via WebSocket connection with `get-kg-core` operations to retrieve knowledge data. - -## Use Cases - -### Regular Backups -```bash -#!/bin/bash -# Daily backup script -cores=("production-core" "research-core" "customer-data") -backup_dir="backups/$(date +%Y%m%d)" -mkdir -p "$backup_dir" - -for core in "${cores[@]}"; do - echo "Backing up $core..." - tg-get-kg-core --id "$core" -o "$backup_dir/$core.msgpack" -done -``` - -### Migration Between Environments -```bash -# Export from development -tg-get-kg-core --id "dev-knowledge" -o dev-export.msgpack - -# Import to staging -tg-put-kg-core --id "staging-knowledge" -i dev-export.msgpack -``` - -### Knowledge Core Versioning -```bash -# Create versioned backups -version="v$(date +%Y%m%d)" -tg-get-kg-core --id "main-knowledge" -o "knowledge-$version.msgpack" - -# Tag with git or other version control -git add "knowledge-$version.msgpack" -git commit -m "Knowledge core backup $version" -``` - -### Data Analysis -```bash -# Export for offline analysis -tg-get-kg-core --id "analytics-data" -o analytics.msgpack - -# Process with custom tools -python analyze_knowledge.py analytics.msgpack -``` - -### Disaster Recovery -```bash -# Create comprehensive backup -cores=$(tg-show-kg-cores) -backup_date=$(date +%Y%m%d-%H%M%S) -backup_dir="disaster-recovery-$backup_date" -mkdir -p "$backup_dir" - -for core in $cores; do - echo "Backing up $core..." - tg-get-kg-core --id "$core" -o "$backup_dir/$core.msgpack" -done - -# Create checksum file -cd "$backup_dir" -sha256sum *.msgpack > checksums.sha256 -``` - -## Automated Backup Strategies - -### Cron Job Setup -```bash -# Add to crontab for daily backups at 2 AM -# 0 2 * * * /path/to/backup-script.sh - -#!/bin/bash -# backup-script.sh -BACKUP_DIR="/backups/$(date +%Y%m%d)" -mkdir -p "$BACKUP_DIR" - -# Backup all cores -tg-show-kg-cores | while read core; do - tg-get-kg-core --id "$core" -o "$BACKUP_DIR/$core.msgpack" -done - -# Cleanup old backups (keep 30 days) -find /backups -type d -mtime +30 -exec rm -rf {} \; -``` - -### Incremental Backups -```bash -# Compare with previous backup -current_cores=$(tg-show-kg-cores | sort) -previous_cores=$(cat last-backup-cores.txt 2>/dev/null | sort) - -# Only backup changed cores -comm -13 <(echo "$previous_cores") <(echo "$current_cores") | while read core; do - tg-get-kg-core --id "$core" -o "incremental/$core.msgpack" -done - -echo "$current_cores" > last-backup-cores.txt -``` - -## Best Practices - -1. **Regular Backups**: Schedule automated backups of important knowledge cores -2. **Organized Storage**: Use dated directories and consistent naming -3. **Verification**: Test backup files periodically by importing them -4. **Compression**: Compress large backup files to save storage -5. **Access Control**: Secure backup files with appropriate permissions -6. **Documentation**: Document what each knowledge core contains -7. **Retention Policy**: Implement backup retention policies - -## Troubleshooting - -### Large File Exports -```bash -# For very large knowledge cores -# Monitor progress and disk space -df -h . # Check available space -tg-get-kg-core --id "huge-core" -o huge-core.msgpack & -watch -n 5 'ls -lh huge-core.msgpack' # Monitor file growth -``` - -### Network Timeouts -```bash -# If export times out, try smaller cores or check network -# Split large cores if possible, or increase timeout settings -``` - -### Corrupted Exports -```bash -# Verify file integrity -file my-core.msgpack # Should show "data" -python -c "import msgpack; msgpack.unpack(open('my-core.msgpack', 'rb'))" -``` \ No newline at end of file diff --git a/docs/cli/tg-graph-to-turtle.md b/docs/cli/tg-graph-to-turtle.md deleted file mode 100644 index a2290117..00000000 --- a/docs/cli/tg-graph-to-turtle.md +++ /dev/null @@ -1,494 +0,0 @@ -# tg-graph-to-turtle - -Exports knowledge graph data to Turtle (TTL) format for backup, analysis, or migration. - -## Synopsis - -```bash -tg-graph-to-turtle [options] -``` - -## Description - -The `tg-graph-to-turtle` command connects to TrustGraph's triple query service and exports all graph triples in Turtle format. This is useful for creating backups, analyzing graph structure, migrating data, or integrating with external RDF tools. - -The command queries up to 10,000 triples and outputs them in standard Turtle format to stdout, while also saving to an `output.ttl` file. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `-U, --user USER`: User ID for data scope (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection to export (default: `default`) - -## Examples - -### Basic Export -```bash -tg-graph-to-turtle -``` - -### Export to File -```bash -tg-graph-to-turtle > knowledge-graph.ttl -``` - -### Export Specific Collection -```bash -tg-graph-to-turtle -C "research-data" > research-graph.ttl -``` - -### Export with Custom Flow -```bash -tg-graph-to-turtle -f "production-flow" -U "admin" > production-graph.ttl -``` - -## Output Format - -The command generates Turtle format with proper RDF syntax: - -```turtle -@prefix ns1: . -@prefix rdf: . -@prefix rdfs: . - -ns1:Person rdf:type rdfs:Class . -ns1:john rdf:type ns1:Person ; - ns1:name "John Doe" ; - ns1:age "30" . -ns1:jane rdf:type ns1:Person ; - ns1:name "Jane Smith" ; - ns1:department "Engineering" . -``` - -### Output Destinations - -1. **stdout**: Standard output for piping or display -2. **output.ttl**: Automatically created file in current directory - -## Use Cases - -### Data Backup -```bash -# Create timestamped backups -timestamp=$(date +%Y%m%d_%H%M%S) -tg-graph-to-turtle > "backup_${timestamp}.ttl" - -# Backup specific collections -collections=("research" "products" "customers") -for collection in "${collections[@]}"; do - tg-graph-to-turtle -C "$collection" > "backup_${collection}_${timestamp}.ttl" -done -``` - -### Data Migration -```bash -# Export from source environment -tg-graph-to-turtle -u "http://source:8088/" > source-data.ttl - -# Import to target environment -tg-load-turtle -i "migration-$(date +%Y%m%d)" \ - -u "ws://target:8088/" \ - source-data.ttl -``` - -### Graph Analysis -```bash -# Export for analysis -tg-graph-to-turtle > analysis-data.ttl - -# Analyze with external tools -rapper -i turtle -o ntriples analysis-data.ttl | wc -l # Count triples -grep -c "rdf:type" analysis-data.ttl # Count type assertions -``` - -### Integration with External Tools -```bash -# Export for Apache Jena -tg-graph-to-turtle > jena-input.ttl -tdb2.tdbloader --loc=tdb-database jena-input.ttl - -# Export for Virtuoso -tg-graph-to-turtle > virtuoso-data.ttl -isql-v -U dba -P password < load-script.sql -``` - -## Advanced Usage - -### Incremental Exports -```bash -# Export with timestamps for incremental backups -last_export_file="last_export_timestamp.txt" -current_time=$(date +%Y%m%d_%H%M%S) - -if [ -f "$last_export_file" ]; then - last_export=$(cat "$last_export_file") - echo "Last export: $last_export" -fi - -echo "Current export: $current_time" -tg-graph-to-turtle > "incremental_${current_time}.ttl" -echo "$current_time" > "$last_export_file" -``` - -### Multi-Collection Export -```bash -# Export all collections to separate files -export_all_collections() { - local output_dir="graph_exports_$(date +%Y%m%d)" - mkdir -p "$output_dir" - - echo "Exporting all collections to $output_dir" - - # Get list of collections (this would need to be implemented) - # For now, use known collections - collections=("default" "research" "products" "documents") - - for collection in "${collections[@]}"; do - echo "Exporting collection: $collection" - tg-graph-to-turtle -C "$collection" > "$output_dir/${collection}.ttl" - - # Verify export - if [ -s "$output_dir/${collection}.ttl" ]; then - triple_count=$(grep -c "\." "$output_dir/${collection}.ttl") - echo " Exported $triple_count triples" - else - echo " No data exported" - fi - done -} - -export_all_collections -``` - -### Filtered Export -```bash -# Export specific types of triples -export_filtered() { - local filter_type="$1" - local output_file="$2" - - echo "Exporting $filter_type triples to $output_file" - - # Export all data first - tg-graph-to-turtle > temp_full_export.ttl - - # Filter based on type - case "$filter_type" in - "classes") - grep "rdf:type.*Class" temp_full_export.ttl > "$output_file" - ;; - "instances") - grep -v "rdf:type.*Class" temp_full_export.ttl > "$output_file" - ;; - "properties") - grep "rdf:type.*Property" temp_full_export.ttl > "$output_file" - ;; - *) - echo "Unknown filter type: $filter_type" - return 1 - ;; - esac - - rm temp_full_export.ttl -} - -# Usage -export_filtered "classes" "schema-classes.ttl" -export_filtered "instances" "instance-data.ttl" -``` - -### Compression and Packaging -```bash -# Export and compress -export_compressed() { - local collection="$1" - local timestamp=$(date +%Y%m%d_%H%M%S) - local filename="${collection}_${timestamp}" - - echo "Exporting and compressing collection: $collection" - - # Export to temporary file - tg-graph-to-turtle -C "$collection" > "${filename}.ttl" - - # Compress - gzip "${filename}.ttl" - - # Create metadata - cat > "${filename}.meta" << EOF -Collection: $collection -Export Date: $(date) -Compressed Size: $(stat -c%s "${filename}.ttl.gz") bytes -MD5: $(md5sum "${filename}.ttl.gz" | cut -d' ' -f1) -EOF - - echo "Export complete: ${filename}.ttl.gz" -} - -# Export multiple collections compressed -collections=("research" "products" "customers") -for collection in "${collections[@]}"; do - export_compressed "$collection" -done -``` - -### Validation and Quality Checks -```bash -# Export with validation -export_with_validation() { - local output_file="$1" - - echo "Exporting with validation to $output_file" - - # Export - tg-graph-to-turtle > "$output_file" - - # Validate Turtle syntax - if rapper -q -i turtle "$output_file" > /dev/null 2>&1; then - echo "✓ Valid Turtle syntax" - else - echo "✗ Invalid Turtle syntax" - return 1 - fi - - # Count triples - triple_count=$(rapper -i turtle -c "$output_file" 2>/dev/null) - echo "Total triples: $triple_count" - - # Check for common issues - if grep -q "^@prefix" "$output_file"; then - echo "✓ Prefixes found" - else - echo "⚠ No prefixes found" - fi - - # Check for URIs with spaces (malformed) - malformed_uris=$(grep -c " " "$output_file" || echo "0") - if [ "$malformed_uris" -gt 0 ]; then - echo "⚠ Found $malformed_uris lines with spaces (potential malformed URIs)" - fi -} - -# Validate export -export_with_validation "validated-export.ttl" -``` - -## Performance Optimization - -### Streaming Export -```bash -# Handle large datasets with streaming -stream_export() { - local collection="$1" - local chunk_size="$2" - local output_prefix="$3" - - echo "Streaming export of collection: $collection" - - # Export to temporary file - tg-graph-to-turtle -C "$collection" > temp_export.ttl - - # Split into chunks - split -l "$chunk_size" temp_export.ttl "${output_prefix}_" - - # Add .ttl extension and validate each chunk - for chunk in ${output_prefix}_*; do - mv "$chunk" "$chunk.ttl" - - # Validate chunk - if rapper -q -i turtle "$chunk.ttl" > /dev/null 2>&1; then - echo "✓ Valid chunk: $chunk.ttl" - else - echo "✗ Invalid chunk: $chunk.ttl" - fi - done - - rm temp_export.ttl -} - -# Stream large collection -stream_export "large-collection" 1000 "chunk" -``` - -### Parallel Processing -```bash -# Export multiple collections in parallel -parallel_export() { - local collections=("$@") - local timestamp=$(date +%Y%m%d_%H%M%S) - - echo "Exporting ${#collections[@]} collections in parallel" - - for collection in "${collections[@]}"; do - ( - echo "Exporting $collection..." - tg-graph-to-turtle -C "$collection" > "${collection}_${timestamp}.ttl" - echo "✓ Completed: $collection" - ) & - done - - wait - echo "All exports completed" -} - -# Export collections in parallel -parallel_export "research" "products" "customers" "documents" -``` - -## Integration Scripts - -### Automated Backup System -```bash -#!/bin/bash -# automated-backup.sh -backup_dir="graph_backups" -retention_days=30 - -echo "Starting automated graph backup..." - -# Create backup directory -mkdir -p "$backup_dir" - -# Export with timestamp -timestamp=$(date +%Y%m%d_%H%M%S) -backup_file="$backup_dir/graph_backup_${timestamp}.ttl" - -echo "Exporting to: $backup_file" -tg-graph-to-turtle > "$backup_file" - -# Compress -gzip "$backup_file" -echo "Compressed: ${backup_file}.gz" - -# Clean old backups -find "$backup_dir" -name "*.ttl.gz" -mtime +$retention_days -delete -echo "Cleaned backups older than $retention_days days" - -# Verify backup -if [ -f "${backup_file}.gz" ]; then - size=$(stat -c%s "${backup_file}.gz") - echo "Backup completed: ${size} bytes" -else - echo "Backup failed!" - exit 1 -fi -``` - -### Data Sync Script -```bash -#!/bin/bash -# sync-graphs.sh -source_url="$1" -target_url="$2" -collection="$3" - -if [ -z "$source_url" ] || [ -z "$target_url" ] || [ -z "$collection" ]; then - echo "Usage: $0 " - exit 1 -fi - -echo "Syncing collection '$collection' from $source_url to $target_url" - -# Export from source -temp_file="sync_temp_$(date +%s).ttl" -tg-graph-to-turtle -u "$source_url" -C "$collection" > "$temp_file" - -# Validate export -if [ ! -s "$temp_file" ]; then - echo "No data exported from source" - exit 1 -fi - -# Load to target -doc_id="sync-$(date +%Y%m%d-%H%M%S)" -if tg-load-turtle -i "$doc_id" -u "$target_url" -C "$collection" "$temp_file"; then - echo "Sync completed successfully" -else - echo "Sync failed" - exit 1 -fi - -# Cleanup -rm "$temp_file" -``` - -## Error Handling - -### Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Flow Not Found -```bash -Exception: Flow instance not found -``` -**Solution**: Verify flow ID with `tg-show-flows`. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Check user permissions for the specified collection. - -### Empty Output -```bash -# No triples exported -``` -**Solution**: Verify collection contains data and user has access. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-load-turtle`](tg-load-turtle.md) - Import Turtle files -- [`tg-triples-query`](tg-triples-query.md) - Query graph triples -- [`tg-show-flows`](tg-show-flows.md) - List available flows -- [`tg-get-kg-core`](tg-get-kg-core.md) - Export knowledge cores - -## API Integration - -This command uses the [Triples Query API](../apis/api-triples-query.md) to retrieve graph data and convert it to Turtle format. - -## Best Practices - -1. **Regular Backups**: Schedule regular exports for data protection -2. **Validation**: Always validate exported Turtle files -3. **Compression**: Compress large exports for storage efficiency -4. **Monitoring**: Track export sizes and success rates -5. **Documentation**: Document export procedures and retention policies -6. **Security**: Ensure sensitive data is properly protected in exports -7. **Version Control**: Consider versioning exported schemas - -## Troubleshooting - -### Large Dataset Issues -```bash -# Check query limits -grep -c "\." output.ttl # Count exported triples -# Default limit is 10,000 triples - -# For larger datasets, consider using tg-get-kg-core -tg-get-kg-core -n "collection-name" > large-export.msgpack -``` - -### Malformed URIs -```bash -# Check for URIs with spaces -grep " " output.ttl | head -5 - -# Clean URIs if needed -sed 's/ /%20/g' output.ttl > cleaned-output.ttl -``` - -### Memory Issues -```bash -# Monitor memory usage during export -free -h -# Consider splitting exports for large datasets -``` \ No newline at end of file diff --git a/docs/cli/tg-init-pulsar-manager.md b/docs/cli/tg-init-pulsar-manager.md deleted file mode 100644 index be7e0f7a..00000000 --- a/docs/cli/tg-init-pulsar-manager.md +++ /dev/null @@ -1,452 +0,0 @@ -# tg-init-pulsar-manager - -Initializes Pulsar Manager with default superuser credentials for TrustGraph. - -## Synopsis - -```bash -tg-init-pulsar-manager -``` - -## Description - -The `tg-init-pulsar-manager` command is a setup utility that creates a default superuser account in Pulsar Manager. This is typically run once during initial TrustGraph deployment to establish administrative access to the Pulsar message queue management interface. - -The command configures a superuser with predefined credentials that can be used to access the Pulsar Manager web interface for monitoring and managing Pulsar topics, namespaces, and tenants. - -## Default Configuration - -The command creates a superuser with these default credentials: - -- **Username**: `admin` -- **Password**: `apachepulsar` -- **Description**: `test` -- **Email**: `username@test.org` - -## Prerequisites - -### Pulsar Manager Service -Pulsar Manager must be running and accessible at `http://localhost:7750` before running this command. - -### Network Connectivity -The command requires network access to the Pulsar Manager API endpoint. - -## Examples - -### Basic Initialization -```bash -tg-init-pulsar-manager -``` - -### Verify Initialization -```bash -# Run the initialization -tg-init-pulsar-manager - -# Check if Pulsar Manager is accessible -curl -s http://localhost:7750/pulsar-manager/ | grep -q "Pulsar Manager" -echo "Pulsar Manager status: $?" -``` - -### Integration with Setup Scripts -```bash -#!/bin/bash -# setup-trustgraph.sh - -echo "Setting up TrustGraph infrastructure..." - -# Wait for Pulsar Manager to be ready -echo "Waiting for Pulsar Manager..." -while ! curl -s http://localhost:7750/pulsar-manager/ > /dev/null; do - echo " Waiting for Pulsar Manager to start..." - sleep 5 -done - -# Initialize Pulsar Manager -echo "Initializing Pulsar Manager..." -tg-init-pulsar-manager - -if [ $? -eq 0 ]; then - echo "✓ Pulsar Manager initialized successfully" - echo " You can access it at: http://localhost:7750/pulsar-manager/" - echo " Username: admin" - echo " Password: apachepulsar" -else - echo "✗ Failed to initialize Pulsar Manager" - exit 1 -fi -``` - -## What It Does - -The command performs the following operations: - -1. **Retrieves CSRF Token**: Gets a CSRF token from Pulsar Manager for secure API access -2. **Creates Superuser**: Makes an authenticated API call to create the superuser account -3. **Sets Permissions**: Configures the user with administrative privileges - -### HTTP Operations -```bash -# Equivalent manual operations: -CSRF_TOKEN=$(curl http://localhost:7750/pulsar-manager/csrf-token) - -curl \ - -H "X-XSRF-TOKEN: $CSRF_TOKEN" \ - -H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \ - -H 'Content-Type: application/json' \ - -X PUT \ - http://localhost:7750/pulsar-manager/users/superuser \ - -d '{"name": "admin", "password": "apachepulsar", "description": "test", "email": "username@test.org"}' -``` - -## Use Cases - -### Initial Deployment -```bash -# Part of TrustGraph deployment sequence -deploy_trustgraph() { - echo "Deploying TrustGraph..." - - # Start services - docker-compose up -d pulsar pulsar-manager - - # Wait for services - wait_for_service "http://localhost:7750/pulsar-manager/" "Pulsar Manager" - wait_for_service "http://localhost:8080/admin/v2/clusters" "Pulsar" - - # Initialize Pulsar Manager - echo "Initializing Pulsar Manager..." - tg-init-pulsar-manager - - # Initialize TrustGraph - echo "Initializing TrustGraph..." - tg-init-trustgraph - - echo "Deployment complete!" -} -``` - -### Development Environment Setup -```bash -# Development setup script -setup_dev_environment() { - echo "Setting up development environment..." - - # Start local services - docker-compose -f docker-compose.dev.yml up -d - - # Wait for readiness - echo "Waiting for services to start..." - sleep 30 - - # Initialize components - tg-init-pulsar-manager - tg-init-trustgraph - - echo "Development environment ready!" - echo "Pulsar Manager: http://localhost:7750/pulsar-manager/" - echo "Credentials: admin / apachepulsar" -} -``` - -### CI/CD Integration -```bash -# Integration testing setup -setup_test_environment() { - local timeout=300 # 5 minutes - local elapsed=0 - - echo "Setting up test environment..." - - # Start services - docker-compose up -d --wait - - # Wait for Pulsar Manager - while ! curl -s http://localhost:7750/pulsar-manager/ > /dev/null; do - if [ $elapsed -ge $timeout ]; then - echo "Timeout waiting for Pulsar Manager" - return 1 - fi - sleep 5 - elapsed=$((elapsed + 5)) - done - - # Initialize - if tg-init-pulsar-manager; then - echo "✓ Test environment ready" - else - echo "✗ Failed to initialize test environment" - return 1 - fi -} -``` - -## Docker Integration - -### Docker Compose Setup -```yaml -# docker-compose.yml -version: '3.8' - -services: - pulsar: - image: apachepulsar/pulsar:latest - ports: - - "6650:6650" - - "8080:8080" - command: bin/pulsar standalone - - pulsar-manager: - image: apachepulsar/pulsar-manager:latest - ports: - - "7750:7750" - depends_on: - - pulsar - environment: - SPRING_CONFIGURATION_FILE: /pulsar-manager/pulsar-manager/application.properties - - trustgraph-init: - image: trustgraph/cli:latest - depends_on: - - pulsar-manager - command: > - sh -c " - sleep 30 && - tg-init-pulsar-manager && - tg-init-trustgraph - " -``` - -### Kubernetes Setup -```yaml -# k8s-init-job.yaml -apiVersion: batch/v1 -kind: Job -metadata: - name: trustgraph-init -spec: - template: - spec: - containers: - - name: init - image: trustgraph/cli:latest - command: - - sh - - -c - - | - echo "Waiting for Pulsar Manager..." - while ! curl -s http://pulsar-manager:7750/pulsar-manager/; do - sleep 5 - done - - echo "Initializing Pulsar Manager..." - tg-init-pulsar-manager - - echo "Initializing TrustGraph..." - tg-init-trustgraph - env: - - name: PULSAR_MANAGER_URL - value: "http://pulsar-manager:7750" - restartPolicy: Never -``` - -## Error Handling - -### Connection Refused -```bash -curl: (7) Failed to connect to localhost port 7750: Connection refused -``` -**Solution**: Ensure Pulsar Manager is running and accessible on port 7750. - -### CSRF Token Issues -```bash -curl: (22) The requested URL returned error: 403 Forbidden -``` -**Solution**: The CSRF token mechanism may have changed. Check Pulsar Manager API documentation. - -### User Already Exists -```bash -HTTP 409 Conflict - User already exists -``` -**Solution**: This is expected on subsequent runs. The superuser is already created. - -### Network Issues -```bash -curl: (28) Operation timed out -``` -**Solution**: Check network connectivity and firewall settings. - -## Security Considerations - -### Default Credentials -The command uses default credentials that should be changed in production: - -```bash -# After initialization, change the password via Pulsar Manager UI -# Or use the API to update credentials -change_admin_password() { - local new_password="$1" - - # Login to get session - session=$(curl -s -c cookies.txt \ - -d "username=admin&password=apachepulsar" \ - http://localhost:7750/pulsar-manager/login) - - # Update password - curl -s -b cookies.txt \ - -H "Content-Type: application/json" \ - -X PUT \ - -d "{\"password\": \"$new_password\"}" \ - http://localhost:7750/pulsar-manager/users/admin - - rm cookies.txt -} -``` - -### Access Control -```bash -# Restrict access to Pulsar Manager in production -configure_security() { - echo "Configuring Pulsar Manager security..." - - # Change default password - change_admin_password "$(openssl rand -base64 32)" - - # Configure firewall rules (example) - # iptables -A INPUT -p tcp --dport 7750 -s 10.0.0.0/8 -j ACCEPT - # iptables -A INPUT -p tcp --dport 7750 -j DROP - - echo "Security configuration complete" -} -``` - -## Advanced Usage - -### Custom Configuration -```bash -# Create custom initialization script -create_custom_init() { - cat > custom-pulsar-manager-init.sh << 'EOF' -#!/bin/bash - -PULSAR_MANAGER_URL=${PULSAR_MANAGER_URL:-http://localhost:7750} -ADMIN_USER=${ADMIN_USER:-admin} -ADMIN_PASS=${ADMIN_PASS:-$(openssl rand -base64 16)} -ADMIN_EMAIL=${ADMIN_EMAIL:-admin@example.com} - -echo "Initializing Pulsar Manager at: $PULSAR_MANAGER_URL" - -# Get CSRF token -CSRF_TOKEN=$(curl -s "$PULSAR_MANAGER_URL/pulsar-manager/csrf-token") - -if [ -z "$CSRF_TOKEN" ]; then - echo "Failed to get CSRF token" - exit 1 -fi - -# Create superuser -response=$(curl -s -w "%{http_code}" \ - -H "X-XSRF-TOKEN: $CSRF_TOKEN" \ - -H "Cookie: XSRF-TOKEN=$CSRF_TOKEN;" \ - -H 'Content-Type: application/json' \ - -X PUT \ - "$PULSAR_MANAGER_URL/pulsar-manager/users/superuser" \ - -d "{\"name\": \"$ADMIN_USER\", \"password\": \"$ADMIN_PASS\", \"description\": \"Admin user\", \"email\": \"$ADMIN_EMAIL\"}") - -http_code="${response: -3}" - -if [ "$http_code" = "200" ] || [ "$http_code" = "409" ]; then - echo "Pulsar Manager initialized successfully" - echo "Username: $ADMIN_USER" - echo "Password: $ADMIN_PASS" -else - echo "Failed to initialize Pulsar Manager (HTTP $http_code)" - exit 1 -fi -EOF - - chmod +x custom-pulsar-manager-init.sh -} -``` - -### Health Checks -```bash -# Health check script -check_pulsar_manager() { - local max_attempts=30 - local attempt=1 - - echo "Checking Pulsar Manager health..." - - while [ $attempt -le $max_attempts ]; do - if curl -s http://localhost:7750/pulsar-manager/ > /dev/null; then - echo "✓ Pulsar Manager is healthy" - return 0 - fi - - echo "Attempt $attempt/$max_attempts - Pulsar Manager not ready" - sleep 5 - attempt=$((attempt + 1)) - done - - echo "✗ Pulsar Manager health check failed" - return 1 -} - -# Use in deployment scripts -if check_pulsar_manager; then - tg-init-pulsar-manager -else - echo "Cannot initialize Pulsar Manager - service not healthy" - exit 1 -fi -``` - -## Related Commands - -- [`tg-init-trustgraph`](tg-init-trustgraph.md) - Initialize TrustGraph with Pulsar configuration -- [`tg-show-config`](tg-show-config.md) - Display current TrustGraph configuration - -## Integration Points - -### Pulsar Manager UI -After initialization, access the web interface at: -- **URL**: `http://localhost:7750/pulsar-manager/` -- **Username**: `admin` -- **Password**: `apachepulsar` - -### TrustGraph Integration -This command is typically run before `tg-init-trustgraph` as part of the complete TrustGraph setup process. - -## Best Practices - -1. **Run Once**: Only run during initial setup - subsequent runs are harmless but unnecessary -2. **Change Defaults**: Change default credentials in production environments -3. **Network Security**: Restrict access to Pulsar Manager in production -4. **Health Checks**: Always verify Pulsar Manager is running before initialization -5. **Automation**: Include in deployment automation scripts -6. **Documentation**: Document custom credentials for operations teams - -## Troubleshooting - -### Service Not Ready -```bash -# Check if Pulsar Manager is running -docker ps | grep pulsar-manager -netstat -tlnp | grep 7750 -``` - -### Port Conflicts -```bash -# Check if port 7750 is in use -lsof -i :7750 -``` - -### Docker Issues -```bash -# Check Pulsar Manager logs -docker logs pulsar-manager - -# Restart if needed -docker restart pulsar-manager -``` \ No newline at end of file diff --git a/docs/cli/tg-init-trustgraph.md b/docs/cli/tg-init-trustgraph.md deleted file mode 100644 index 2a3f48ae..00000000 --- a/docs/cli/tg-init-trustgraph.md +++ /dev/null @@ -1,523 +0,0 @@ -# tg-init-trustgraph - -Initializes Pulsar with TrustGraph tenant, namespaces, and configuration settings. - -## Synopsis - -```bash -tg-init-trustgraph [options] -``` - -## Description - -The `tg-init-trustgraph` command initializes the Apache Pulsar messaging system with the required tenant, namespaces, policies, and configuration needed for TrustGraph operation. This is a foundational setup command that must be run before TrustGraph can operate properly. - -The command creates the necessary Pulsar infrastructure and optionally loads initial configuration data into the system. - -## Options - -### Optional Arguments - -- `-p, --pulsar-admin-url URL`: Pulsar admin URL (default: `http://pulsar:8080`) -- `--pulsar-host HOST`: Pulsar host for client connections (default: `pulsar://pulsar:6650`) -- `--pulsar-api-key KEY`: Pulsar API key for authentication -- `-c, --config CONFIG`: Initial configuration JSON to load -- `-t, --tenant TENANT`: Tenant name (default: `tg`) - -## Examples - -### Basic Initialization -```bash -tg-init-trustgraph -``` - -### Custom Pulsar Configuration -```bash -tg-init-trustgraph \ - --pulsar-admin-url http://localhost:8080 \ - --pulsar-host pulsar://localhost:6650 -``` - -### With Initial Configuration -```bash -tg-init-trustgraph \ - --config '{"prompt": {"system": "You are a helpful AI assistant"}}' -``` - -### Custom Tenant -```bash -tg-init-trustgraph --tenant production-tg -``` - -### Production Setup -```bash -tg-init-trustgraph \ - --pulsar-admin-url http://pulsar-cluster:8080 \ - --pulsar-host pulsar://pulsar-cluster:6650 \ - --pulsar-api-key "your-api-key" \ - --tenant production \ - --config "$(cat production-config.json)" -``` - -## What It Creates - -### Tenant Structure -The command creates a TrustGraph tenant with the following namespaces: - -#### Flow Namespace (`tg/flow`) -- **Purpose**: Processing workflows and flow definitions -- **Retention**: Default retention policies - -#### Request Namespace (`tg/request`) -- **Purpose**: Incoming API requests and commands -- **Retention**: Default retention policies - -#### Response Namespace (`tg/response`) -- **Purpose**: API responses and results -- **Retention**: 3 minutes, unlimited size -- **Subscription Expiration**: 30 minutes - -#### Config Namespace (`tg/config`) -- **Purpose**: System configuration and settings -- **Retention**: 10MB size limit, unlimited time -- **Subscription Expiration**: 5 minutes - -### Configuration Loading - -If a configuration is provided, the command also: -1. Connects to the configuration service -2. Loads the provided configuration data -3. Ensures configuration versioning is maintained - -## Configuration Format - -The configuration should be provided as JSON with this structure: - -```json -{ - "prompt": { - "system": "System prompt text", - "template-index": ["template1", "template2"], - "template.template1": { - "id": "template1", - "prompt": "Template text with {{variables}}", - "response-type": "text" - } - }, - "token-costs": { - "gpt-4": { - "input_price": 0.00003, - "output_price": 0.00006 - } - }, - "agent": { - "tool-index": ["tool1"], - "tool.tool1": { - "id": "tool1", - "name": "Example Tool", - "description": "Tool description", - "arguments": [] - } - } -} -``` - -## Use Cases - -### Initial Deployment -```bash -# Complete TrustGraph initialization sequence -initialize_trustgraph() { - echo "Initializing TrustGraph infrastructure..." - - # Wait for Pulsar to be ready - wait_for_pulsar - - # Initialize Pulsar Manager (if using) - tg-init-pulsar-manager - - # Initialize TrustGraph - tg-init-trustgraph \ - --config "$(cat initial-config.json)" - - echo "TrustGraph initialization complete!" -} - -wait_for_pulsar() { - local timeout=300 - local elapsed=0 - - while ! curl -s http://pulsar:8080/admin/v2/clusters > /dev/null; do - if [ $elapsed -ge $timeout ]; then - echo "Timeout waiting for Pulsar" - exit 1 - fi - echo "Waiting for Pulsar..." - sleep 5 - elapsed=$((elapsed + 5)) - done -} -``` - -### Environment-Specific Setup -```bash -# Development environment -setup_dev() { - tg-init-trustgraph \ - --pulsar-admin-url http://localhost:8080 \ - --pulsar-host pulsar://localhost:6650 \ - --tenant dev \ - --config "$(cat dev-config.json)" -} - -# Staging environment -setup_staging() { - tg-init-trustgraph \ - --pulsar-admin-url http://staging-pulsar:8080 \ - --pulsar-host pulsar://staging-pulsar:6650 \ - --tenant staging \ - --config "$(cat staging-config.json)" -} - -# Production environment -setup_production() { - tg-init-trustgraph \ - --pulsar-admin-url http://prod-pulsar:8080 \ - --pulsar-host pulsar://prod-pulsar:6650 \ - --pulsar-api-key "$PULSAR_API_KEY" \ - --tenant production \ - --config "$(cat production-config.json)" -} -``` - -### Configuration Management -```bash -# Load different configurations -load_ai_config() { - local config='{ - "prompt": { - "system": "You are an AI assistant specialized in data analysis.", - "template-index": ["analyze", "summarize"], - "template.analyze": { - "id": "analyze", - "prompt": "Analyze this data: {{data}}", - "response-type": "json" - } - }, - "token-costs": { - "gpt-4": {"input_price": 0.00003, "output_price": 0.00006}, - "claude-3-sonnet": {"input_price": 0.000003, "output_price": 0.000015} - } - }' - - tg-init-trustgraph --config "$config" -} - -load_research_config() { - local config='{ - "prompt": { - "system": "You are a research assistant focused on academic literature.", - "template-index": ["research", "citation"], - "template.research": { - "id": "research", - "prompt": "Research question: {{question}}\nContext: {{context}}", - "response-type": "text" - } - } - }' - - tg-init-trustgraph --config "$config" -} -``` - -## Advanced Usage - -### Cluster Setup -```bash -# Multi-cluster initialization -setup_cluster() { - local clusters=("cluster1:8080" "cluster2:8080" "cluster3:8080") - - for cluster in "${clusters[@]}"; do - echo "Initializing cluster: $cluster" - - tg-init-trustgraph \ - --pulsar-admin-url "http://$cluster" \ - --pulsar-host "pulsar://${cluster%:*}:6650" \ - --tenant "cluster-$(echo $cluster | cut -d: -f1)" \ - --config "$(cat cluster-config.json)" - done -} -``` - -### Configuration Migration -```bash -# Migrate configuration between environments -migrate_config() { - local source_env="$1" - local target_env="$2" - - echo "Migrating configuration from $source_env to $target_env" - - # Export existing configuration (would need a tg-export-config command) - # For now, assume we have the config in a file - - tg-init-trustgraph \ - --pulsar-admin-url "http://$target_env:8080" \ - --pulsar-host "pulsar://$target_env:6650" \ - --config "$(cat ${source_env}-config.json)" -} -``` - -### Validation and Testing -```bash -# Validate initialization -validate_initialization() { - local tenant="${1:-tg}" - local admin_url="${2:-http://pulsar:8080}" - - echo "Validating TrustGraph initialization..." - - # Check tenant exists - if curl -s "$admin_url/admin/v2/tenants/$tenant" > /dev/null; then - echo "✓ Tenant '$tenant' exists" - else - echo "✗ Tenant '$tenant' missing" - return 1 - fi - - # Check namespaces - local namespaces=("flow" "request" "response" "config") - for ns in "${namespaces[@]}"; do - if curl -s "$admin_url/admin/v2/namespaces/$tenant/$ns" > /dev/null; then - echo "✓ Namespace '$tenant/$ns' exists" - else - echo "✗ Namespace '$tenant/$ns' missing" - return 1 - fi - done - - echo "✓ TrustGraph initialization validated" -} - -# Test configuration loading -test_config_loading() { - local test_config='{ - "test": { - "value": "test-value", - "timestamp": "'$(date -Iseconds)'" - } - }' - - echo "Testing configuration loading..." - - if tg-init-trustgraph --config "$test_config"; then - echo "✓ Configuration loading successful" - else - echo "✗ Configuration loading failed" - return 1 - fi -} -``` - -### Retry Logic and Error Handling -```bash -# Robust initialization with retry -robust_init() { - local max_attempts=5 - local attempt=1 - local delay=10 - - while [ $attempt -le $max_attempts ]; do - echo "Initialization attempt $attempt of $max_attempts..." - - if tg-init-trustgraph "$@"; then - echo "✓ Initialization successful on attempt $attempt" - return 0 - else - echo "✗ Attempt $attempt failed" - - if [ $attempt -lt $max_attempts ]; then - echo "Waiting ${delay}s before retry..." - sleep $delay - delay=$((delay * 2)) # Exponential backoff - fi - fi - - attempt=$((attempt + 1)) - done - - echo "✗ All initialization attempts failed" - return 1 -} -``` - -## Docker Integration - -### Docker Compose -```yaml -version: '3.8' - -services: - pulsar: - image: apachepulsar/pulsar:latest - ports: - - "6650:6650" - - "8080:8080" - command: bin/pulsar standalone - - trustgraph-init: - image: trustgraph/cli:latest - depends_on: - - pulsar - volumes: - - ./config.json:/config.json:ro - command: > - sh -c " - sleep 30 && - tg-init-trustgraph --config '$$(cat /config.json)' - " - environment: - - TRUSTGRAPH_PULSAR_ADMIN_URL=http://pulsar:8080 - - TRUSTGRAPH_PULSAR_HOST=pulsar://pulsar:6650 -``` - -### Kubernetes Init Container -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: trustgraph-config -data: - config.json: | - { - "prompt": { - "system": "You are a helpful AI assistant." - } - } ---- -apiVersion: batch/v1 -kind: Job -metadata: - name: trustgraph-init -spec: - template: - spec: - initContainers: - - name: wait-for-pulsar - image: busybox - command: - - sh - - -c - - | - until nc -z pulsar 8080; do - echo "Waiting for Pulsar..." - sleep 5 - done - containers: - - name: init - image: trustgraph/cli:latest - command: - - tg-init-trustgraph - - --pulsar-admin-url=http://pulsar:8080 - - --pulsar-host=pulsar://pulsar:6650 - - --config=$(cat /config/config.json) - volumeMounts: - - name: config - mountPath: /config - volumes: - - name: config - configMap: - name: trustgraph-config - restartPolicy: Never -``` - -## Error Handling - -### Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Verify Pulsar is running and accessible at the specified admin URL. - -### Authentication Errors -```bash -Exception: 401 Unauthorized -``` -**Solution**: Check Pulsar API key if authentication is enabled. - -### Tenant Creation Failures -```bash -Exception: Tenant creation failed -``` -**Solution**: Verify admin permissions and cluster configuration. - -### Configuration Loading Errors -```bash -Exception: Invalid JSON configuration -``` -**Solution**: Validate JSON syntax and structure. - -## Security Considerations - -### API Key Management -```bash -# Use environment variables for sensitive data -export PULSAR_API_KEY="your-secure-api-key" -tg-init-trustgraph --pulsar-api-key "$PULSAR_API_KEY" - -# Or use a secure file -tg-init-trustgraph --pulsar-api-key "$(cat /secure/pulsar-key.txt)" -``` - -### Network Security -```bash -# Use TLS for production -tg-init-trustgraph \ - --pulsar-admin-url https://secure-pulsar:8443 \ - --pulsar-host pulsar+ssl://secure-pulsar:6651 -``` - -## Related Commands - -- [`tg-init-pulsar-manager`](tg-init-pulsar-manager.md) - Initialize Pulsar Manager -- [`tg-show-config`](tg-show-config.md) - Display current configuration -- [`tg-set-prompt`](tg-set-prompt.md) - Configure individual prompts - -## Best Practices - -1. **Run Once**: Typically run once per environment during initial setup -2. **Idempotent**: Safe to run multiple times - existing resources are preserved -3. **Configuration**: Always load initial configuration during setup -4. **Validation**: Verify initialization success with validation scripts -5. **Environment Variables**: Use environment variables for sensitive configuration -6. **Retry Logic**: Implement retry logic for robust deployments -7. **Monitoring**: Monitor namespace and topic creation for issues - -## Troubleshooting - -### Pulsar Not Ready -```bash -# Check Pulsar health -curl http://pulsar:8080/admin/v2/clusters - -# Check Pulsar logs -docker logs pulsar -``` - -### Permission Issues -```bash -# Verify Pulsar admin access -curl http://pulsar:8080/admin/v2/tenants - -# Check API key validity if using authentication -``` - -### Configuration Validation -```bash -# Validate JSON configuration -echo "$CONFIG" | jq . - -# Test configuration loading separately -tg-init-trustgraph --config '{"test": "value"}' -``` \ No newline at end of file diff --git a/docs/cli/tg-invoke-agent.md b/docs/cli/tg-invoke-agent.md deleted file mode 100644 index e3423fe1..00000000 --- a/docs/cli/tg-invoke-agent.md +++ /dev/null @@ -1,163 +0,0 @@ -# tg-invoke-agent - -Uses the agent service to answer a question via interactive WebSocket connection. - -## Synopsis - -```bash -tg-invoke-agent -q "your question" [options] -``` - -## Description - -The `tg-invoke-agent` command provides an interactive interface to TrustGraph's agent service. It connects via WebSocket to submit questions and receive real-time responses, including the agent's thinking process and observations when verbose mode is enabled. - -The agent uses available tools and knowledge sources to answer questions, providing a conversational AI interface to your TrustGraph knowledge base. - -## Options - -### Required Arguments - -- `-q, --question QUESTION`: The question to ask the agent - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `ws://localhost:8088/`) -- `-f, --flow-id FLOW`: Flow ID to use (default: `default`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection identifier (default: `default`) -- `-l, --plan PLAN`: Agent plan specification (optional) -- `-s, --state STATE`: Agent initial state (optional) -- `-v, --verbose`: Output agent's thinking process and observations - -## Examples - -### Basic Question -```bash -tg-invoke-agent -q "What is machine learning?" -``` - -### Verbose Output with Thinking Process -```bash -tg-invoke-agent -q "Explain the benefits of neural networks" -v -``` - -### Using Specific Flow -```bash -tg-invoke-agent -q "What documents are available?" -f research-flow -``` - -### With Custom User and Collection -```bash -tg-invoke-agent -q "Show me recent papers" -U alice -C research-papers -``` - -### Using Custom API URL -```bash -tg-invoke-agent -q "What is AI?" -u ws://production:8088/ -``` - -## Output Format - -### Standard Output -The agent provides direct answers to your questions: - -``` -AI stands for Artificial Intelligence, which refers to computer systems that can perform tasks typically requiring human intelligence. -``` - -### Verbose Output -With `-v` flag, you see the agent's thinking process: - -``` -❓ What is machine learning? - -🤔 I need to provide a comprehensive explanation of machine learning, including its definition, key concepts, and applications. - -💡 Let me search for information about machine learning in the knowledge base. - -Machine learning is a subset of artificial intelligence that enables computers to learn and improve automatically from experience without being explicitly programmed... -``` - -The emoji indicators represent: -- ❓ Your question -- 🤔 Agent's thinking/reasoning -- 💡 Agent's observations from tools/searches - -## Error Handling - -Common errors and solutions: - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Flow Not Found -```bash -Exception: Invalid flow -``` -**Solution**: Check that the specified flow exists and is running using `tg-show-flows`. - -### Authentication Errors -```bash -Exception: Unauthorized -``` -**Solution**: Verify your authentication credentials and permissions. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL (converted to WebSocket URL automatically) - -## Related Commands - -- [`tg-invoke-graph-rag`](tg-invoke-graph-rag.md) - Graph-based retrieval augmented generation -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based retrieval augmented generation -- [`tg-invoke-llm`](tg-invoke-llm.md) - Direct LLM text completion -- [`tg-show-tools`](tg-show-tools.md) - List available agent tools -- [`tg-show-flows`](tg-show-flows.md) - List available flows - -## Technical Details - -### WebSocket Communication -The command uses WebSocket protocol for real-time communication with the agent service. The URL is automatically converted from HTTP to WebSocket format. - -### Message Format -Messages are exchanged in JSON format: - -**Request:** -```json -{ - "id": "unique-message-id", - "service": "agent", - "flow": "flow-id", - "request": { - "question": "your question" - } -} -``` - -**Response:** -```json -{ - "id": "unique-message-id", - "response": { - "thought": "agent thinking", - "observation": "agent observation", - "answer": "final answer" - }, - "complete": true -} -``` - -### API Integration -This command uses the [Agent API](../apis/api-agent.md) via WebSocket connection for real-time interaction. - -## Use Cases - -- **Interactive Q&A**: Ask questions about your knowledge base -- **Research Assistance**: Get help analyzing documents and data -- **Knowledge Discovery**: Explore connections in your data -- **Troubleshooting**: Get help with technical issues using verbose mode -- **Educational**: Learn about topics in your knowledge base \ No newline at end of file diff --git a/docs/cli/tg-invoke-document-rag.md b/docs/cli/tg-invoke-document-rag.md deleted file mode 100644 index b972aeb9..00000000 --- a/docs/cli/tg-invoke-document-rag.md +++ /dev/null @@ -1,438 +0,0 @@ -# tg-invoke-document-rag - -Invokes the DocumentRAG service to answer questions using document context and retrieval-augmented generation. - -## Synopsis - -```bash -tg-invoke-document-rag -q QUESTION [options] -``` - -## Description - -The `tg-invoke-document-rag` command uses TrustGraph's DocumentRAG service to answer questions by retrieving relevant document context and generating responses using large language models. This implements a Retrieval-Augmented Generation (RAG) approach that grounds AI responses in your document corpus. - -The service searches through indexed documents to find relevant context, then uses that context to generate accurate, source-backed answers to questions. - -## Options - -### Required Arguments - -- `-q, --question QUESTION`: The question to answer - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `-U, --user USER`: User ID for context isolation (default: `trustgraph`) -- `-C, --collection COLLECTION`: Document collection to search (default: `default`) -- `-d, --doc-limit LIMIT`: Maximum number of documents to retrieve (default: `10`) - -## Examples - -### Basic Question Answering -```bash -tg-invoke-document-rag -q "What is the company's return policy?" -``` - -### Question with Custom Parameters -```bash -tg-invoke-document-rag \ - -q "How do I configure SSL certificates?" \ - -f "production-docs" \ - -U "admin" \ - -C "technical-docs" \ - -d 5 -``` - -### Complex Technical Questions -```bash -tg-invoke-document-rag \ - -q "What are the performance benchmarks for the new API endpoints?" \ - -f "api-docs" \ - -C "performance-reports" -``` - -### Multi-domain Questions -```bash -# Legal documents -tg-invoke-document-rag -q "What are the privacy policy requirements?" -C "legal-docs" - -# Technical documentation -tg-invoke-document-rag -q "How do I troubleshoot connection timeouts?" -C "tech-docs" - -# Marketing materials -tg-invoke-document-rag -q "What are our key product differentiators?" -C "marketing" -``` - -## Output Format - -The command returns a structured response with: - -```json -{ - "question": "What is the company's return policy?", - "answer": "Based on the company policy documents, customers can return items within 30 days of purchase for a full refund. Items must be in original condition with receipt. Digital products are non-refundable except in cases of technical defects.", - "sources": [ - { - "document": "customer-service-policy.pdf", - "relevance": 0.92, - "section": "Returns and Refunds" - }, - { - "document": "terms-of-service.pdf", - "relevance": 0.85, - "section": "Customer Rights" - } - ], - "confidence": 0.89 -} -``` - -## Use Cases - -### Customer Support -```bash -# Answer common customer questions -tg-invoke-document-rag -q "How do I reset my password?" -C "support-docs" - -# Product information queries -tg-invoke-document-rag -q "What are the system requirements?" -C "product-specs" - -# Troubleshooting assistance -tg-invoke-document-rag -q "Why is my upload failing?" -C "troubleshooting" -``` - -### Technical Documentation -```bash -# API documentation queries -tg-invoke-document-rag -q "How do I authenticate with the REST API?" -C "api-docs" - -# Configuration questions -tg-invoke-document-rag -q "What are the required environment variables?" -C "config-docs" - -# Architecture information -tg-invoke-document-rag -q "How does the caching system work?" -C "architecture" -``` - -### Research and Analysis -```bash -# Research queries -tg-invoke-document-rag -q "What are the latest industry trends?" -C "research-reports" - -# Compliance questions -tg-invoke-document-rag -q "What are the GDPR requirements?" -C "compliance-docs" - -# Best practices -tg-invoke-document-rag -q "What are the security best practices?" -C "security-guidelines" -``` - -### Interactive Q&A Sessions -```bash -# Batch questions for analysis -questions=( - "What is our market share?" - "How do we compare to competitors?" - "What are the growth projections?" -) - -for question in "${questions[@]}"; do - echo "Question: $question" - tg-invoke-document-rag -q "$question" -C "business-reports" - echo "---" -done -``` - -## Document Context and Retrieval - -### Document Limit Tuning -```bash -# Few documents for focused answers -tg-invoke-document-rag -q "What is the API rate limit?" -d 3 - -# Many documents for comprehensive analysis -tg-invoke-document-rag -q "What are all the security measures?" -d 20 -``` - -### Collection-Specific Queries -```bash -# Target specific document collections -tg-invoke-document-rag -q "What is the deployment process?" -C "devops-docs" -tg-invoke-document-rag -q "What are the testing standards?" -C "qa-docs" -tg-invoke-document-rag -q "What is the coding style guide?" -C "dev-standards" -``` - -### User Context Isolation -```bash -# Department-specific contexts -tg-invoke-document-rag -q "What is the budget allocation?" -U "finance" -C "finance-docs" -tg-invoke-document-rag -q "What are the hiring requirements?" -U "hr" -C "hr-docs" -``` - -## Error Handling - -### Question Required -```bash -Exception: Question is required -``` -**Solution**: Provide a question with the `-q` option. - -### Flow Not Found -```bash -Exception: Flow instance 'nonexistent-flow' not found -``` -**Solution**: Verify the flow ID exists with `tg-show-flows`. - -### Collection Not Found -```bash -Exception: Collection 'invalid-collection' not found -``` -**Solution**: Check available collections with document library commands. - -### No Documents Found -```bash -Exception: No relevant documents found for query -``` -**Solution**: Verify documents are indexed and collection contains relevant content. - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph services are running. - -## Advanced Usage - -### Batch Processing -```bash -# Process questions from file -while IFS= read -r question; do - if [ -n "$question" ]; then - echo "Processing: $question" - tg-invoke-document-rag -q "$question" -C "knowledge-base" > "answer-$(date +%s).json" - fi -done < questions.txt -``` - -### Question Analysis Pipeline -```bash -#!/bin/bash -# analyze-questions.sh -questions_file="$1" -collection="$2" - -if [ -z "$questions_file" ] || [ -z "$collection" ]; then - echo "Usage: $0 " - exit 1 -fi - -echo "Question Analysis Report - $(date)" -echo "Collection: $collection" -echo "==================================" - -question_num=1 -while IFS= read -r question; do - if [ -n "$question" ]; then - echo -e "\n$question_num. $question" - echo "$(printf '=%.0s' {1..50})" - - # Get answer - answer=$(tg-invoke-document-rag -q "$question" -C "$collection" 2>/dev/null) - - if [ $? -eq 0 ]; then - echo "$answer" | jq -r '.answer' 2>/dev/null || echo "$answer" - - # Extract sources if available - sources=$(echo "$answer" | jq -r '.sources[]?.document' 2>/dev/null) - if [ -n "$sources" ]; then - echo -e "\nSources:" - echo "$sources" | sed 's/^/ - /' - fi - else - echo "ERROR: Could not get answer" - fi - - question_num=$((question_num + 1)) - fi -done < "$questions_file" -``` - -### Quality Assessment -```bash -# Assess answer quality with multiple document limits -question="What are the security protocols?" -collection="security-docs" - -echo "Answer Quality Assessment" -echo "Question: $question" -echo "========================" - -for limit in 3 5 10 15 20; do - echo -e "\nDocument limit: $limit" - echo "$(printf '-%.0s' {1..30})" - - answer=$(tg-invoke-document-rag -q "$question" -C "$collection" -d $limit 2>/dev/null) - - if [ $? -eq 0 ]; then - # Get answer length and source count - answer_length=$(echo "$answer" | jq -r '.answer' 2>/dev/null | wc -c) - source_count=$(echo "$answer" | jq -r '.sources | length' 2>/dev/null) - confidence=$(echo "$answer" | jq -r '.confidence' 2>/dev/null) - - echo "Answer length: $answer_length characters" - echo "Source count: $source_count" - echo "Confidence: $confidence" - else - echo "ERROR: Failed to get answer" - fi -done -``` - -### Interactive Q&A Interface -```bash -#!/bin/bash -# interactive-rag.sh -collection="${1:-default}" -flow_id="${2:-default}" - -echo "Interactive Document RAG Interface" -echo "Collection: $collection" -echo "Flow ID: $flow_id" -echo "Type 'quit' to exit" -echo "==================================" - -while true; do - echo -n "Question: " - read -r question - - if [ "$question" = "quit" ]; then - break - fi - - if [ -n "$question" ]; then - echo "Thinking..." - answer=$(tg-invoke-document-rag -q "$question" -C "$collection" -f "$flow_id" 2>/dev/null) - - if [ $? -eq 0 ]; then - echo "Answer:" - echo "$answer" | jq -r '.answer' 2>/dev/null || echo "$answer" - - # Show sources if available - sources=$(echo "$answer" | jq -r '.sources[]?.document' 2>/dev/null) - if [ -n "$sources" ]; then - echo -e "\nSources:" - echo "$sources" | sed 's/^/ - /' - fi - else - echo "Sorry, I couldn't answer that question." - fi - - echo -e "\n$(printf '=%.0s' {1..50})" - fi -done - -echo "Goodbye!" -``` - -## Performance Optimization - -### Document Limit Optimization -```bash -# Test different document limits for performance -question="What is the system architecture?" -collection="tech-docs" - -for limit in 3 5 10 15 20; do - echo "Testing document limit: $limit" - start_time=$(date +%s%N) - - tg-invoke-document-rag -q "$question" -C "$collection" -d $limit > /dev/null 2>&1 - - end_time=$(date +%s%N) - duration=$(( (end_time - start_time) / 1000000 )) # Convert to milliseconds - - echo " Duration: ${duration}ms" -done -``` - -### Caching Strategy -```bash -# Cache frequently asked questions -cache_dir="rag-cache" -mkdir -p "$cache_dir" - -ask_question() { - local question="$1" - local collection="$2" - local cache_key=$(echo "$question-$collection" | md5sum | cut -d' ' -f1) - local cache_file="$cache_dir/$cache_key.json" - - if [ -f "$cache_file" ]; then - echo "Cache hit for: $question" - cat "$cache_file" - else - echo "Cache miss, querying: $question" - tg-invoke-document-rag -q "$question" -C "$collection" | tee "$cache_file" - fi -} - -# Use cached queries -ask_question "What is the API documentation?" "tech-docs" -ask_question "What are the system requirements?" "spec-docs" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-load-pdf`](tg-load-pdf.md) - Load PDF documents for RAG -- [`tg-show-library-documents`](tg-show-library-documents.md) - List available documents -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Direct prompt invocation without RAG -- [`tg-start-flow`](tg-start-flow.md) - Start flows for document processing -- [`tg-show-flows`](tg-show-flows.md) - List active flow instances - -## API Integration - -This command uses the [DocumentRAG API](../apis/api-document-rag.md) to perform retrieval-augmented generation using the document corpus. - -## Best Practices - -1. **Question Formulation**: Use specific, well-formed questions for better results -2. **Collection Organization**: Organize documents into logical collections -3. **Document Limits**: Balance accuracy with performance using appropriate document limits -4. **User Context**: Use user isolation for sensitive or department-specific queries -5. **Source Verification**: Always check source documents for critical information -6. **Caching**: Implement caching for frequently asked questions -7. **Quality Assessment**: Regularly evaluate answer quality and adjust parameters - -## Troubleshooting - -### Poor Answer Quality -```bash -# Try different document limits -tg-invoke-document-rag -q "your question" -d 5 # Fewer documents -tg-invoke-document-rag -q "your question" -d 15 # More documents - -# Check document collection -tg-show-library-documents -C "your-collection" -``` - -### Slow Response Times -```bash -# Reduce document limit -tg-invoke-document-rag -q "your question" -d 3 - -# Check flow performance -tg-show-flows | grep "document-rag" -``` - -### Missing Context -```bash -# Verify documents are indexed -tg-show-library-documents -C "your-collection" - -# Check if collection exists -tg-show-library-documents | grep "your-collection" -``` \ No newline at end of file diff --git a/docs/cli/tg-invoke-graph-rag.md b/docs/cli/tg-invoke-graph-rag.md deleted file mode 100644 index 3d1c8512..00000000 --- a/docs/cli/tg-invoke-graph-rag.md +++ /dev/null @@ -1,221 +0,0 @@ -# tg-invoke-graph-rag - -Uses the Graph RAG service to answer questions using knowledge graph data. - -## Synopsis - -```bash -tg-invoke-graph-rag -q "question" [options] -``` - -## Description - -The `tg-invoke-graph-rag` command performs graph-based Retrieval Augmented Generation (RAG) to answer questions using structured knowledge from the knowledge graph. It retrieves relevant entities and relationships from the graph and uses them to provide contextually accurate answers. - -Graph RAG is particularly effective for questions that require understanding relationships between entities, reasoning over structured knowledge, and providing answers based on factual connections in the data. - -## Options - -### Required Arguments - -- `-q, --question QUESTION`: The question to answer using graph knowledge - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id FLOW`: Flow ID to use (default: `default`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection identifier (default: `default`) - -### Graph Search Parameters - -- `-e, --entity-limit LIMIT`: Maximum entities to retrieve (default: `50`) -- `-t, --triple-limit LIMIT`: Maximum triples to retrieve (default: `30`) -- `-s, --max-subgraph-size SIZE`: Maximum subgraph size (default: `150`) -- `-p, --max-path-length LENGTH`: Maximum path length for graph traversal (default: `2`) - -## Examples - -### Basic Graph RAG Query -```bash -tg-invoke-graph-rag -q "What is the relationship between AI and machine learning?" -``` - -### With Custom Parameters -```bash -tg-invoke-graph-rag \ - -q "How are neural networks connected to deep learning?" \ - -e 100 \ - -t 50 \ - -s 200 -``` - -### Using Specific Flow and Collection -```bash -tg-invoke-graph-rag \ - -q "What research papers discuss climate change?" \ - -f research-flow \ - -C scientific-papers \ - -U researcher -``` - -### Large Graph Exploration -```bash -tg-invoke-graph-rag \ - -q "Explain the connections between quantum computing and cryptography" \ - -e 150 \ - -t 100 \ - -s 300 \ - -p 3 -``` - -## Graph Search Parameters Explained - -### Entity Limit (`-e, --entity-limit`) -Controls how many entities are retrieved from the knowledge graph that are relevant to the question. Higher values provide more context but may include less relevant information. - -### Triple Limit (`-t, --triple-limit`) -Limits the number of relationship triples (subject-predicate-object) retrieved. These triples define the relationships between entities. - -### Max Subgraph Size (`-s, --max-subgraph-size`) -Sets the maximum size of the knowledge subgraph used for answering. Larger subgraphs provide more complete context but require more processing. - -### Max Path Length (`-p, --max-path-length`) -Determines how many "hops" through the graph are considered when finding relationships. Higher values can discover more distant but potentially relevant connections. - -## Output Format - -The command returns a natural language answer based on the retrieved graph knowledge: - -``` -Neural networks are a fundamental component of deep learning architectures. -The knowledge graph shows that deep learning is a subset of machine learning -that specifically utilizes multi-layered neural networks. These networks consist -of interconnected nodes (neurons) organized in layers, where each layer processes -and transforms the input data. The relationship between neural networks and deep -learning is that neural networks provide the computational structure, while deep -learning represents the training methodologies and architectures that use these -networks to learn complex patterns from data. -``` - -## How Graph RAG Works - -1. **Query Analysis**: Analyzes the question to identify key entities and concepts -2. **Entity Retrieval**: Finds relevant entities in the knowledge graph -3. **Subgraph Extraction**: Retrieves connected entities and relationships -4. **Context Assembly**: Combines retrieved knowledge into coherent context -5. **Answer Generation**: Uses LLM with graph context to generate accurate answers - -## Comparison with Document RAG - -### Graph RAG Advantages -- **Structured Knowledge**: Leverages explicit relationships between concepts -- **Reasoning Capability**: Can infer answers from connected facts -- **Consistency**: Provides factually consistent answers based on structured data -- **Relationship Discovery**: Excellent for questions about connections and relationships - -### When to Use Graph RAG -- Questions about relationships between entities -- Queries requiring logical reasoning over facts -- When you need to understand connections in complex domains -- For factual questions with precise answers - -## Error Handling - -### Flow Not Available -```bash -Exception: Invalid flow -``` -**Solution**: Verify the flow exists and is running with `tg-show-flows`. - -### No Graph Data -```bash -Exception: No relevant knowledge found -``` -**Solution**: Ensure knowledge has been loaded into the graph using `tg-load-kg-core` or document processing. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Parameter Errors -```bash -Exception: Invalid parameter value -``` -**Solution**: Verify that numeric parameters are within valid ranges. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based RAG queries -- [`tg-invoke-agent`](tg-invoke-agent.md) - Interactive agent with multiple tools -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge into graph -- [`tg-show-graph`](tg-show-graph.md) - Explore graph contents -- [`tg-show-flows`](tg-show-flows.md) - List available flows - -## API Integration - -This command uses the [Graph RAG API](../apis/api-graph-rag.md) to perform retrieval augmented generation using knowledge graph data. - -## Use Cases - -### Research and Academia -```bash -tg-invoke-graph-rag \ - -q "What are the key researchers working on quantum machine learning?" \ - -C academic-papers -``` - -### Business Intelligence -```bash -tg-invoke-graph-rag \ - -q "How do our products relate to market trends?" \ - -C business-data -``` - -### Technical Documentation -```bash -tg-invoke-graph-rag \ - -q "What are the dependencies between these software components?" \ - -C technical-docs -``` - -### Medical Knowledge -```bash -tg-invoke-graph-rag \ - -q "What are the known interactions between these medications?" \ - -C medical-knowledge -``` - -## Performance Tuning - -### For Broad Questions -Increase limits to get comprehensive answers: -```bash --e 100 -t 80 -s 250 -p 3 -``` - -### For Specific Questions -Use lower limits for faster, focused responses: -```bash --e 30 -t 20 -s 100 -p 2 -``` - -### For Deep Analysis -Allow longer paths and larger subgraphs: -```bash --e 150 -t 100 -s 400 -p 4 -``` - -## Best Practices - -1. **Parameter Tuning**: Start with defaults and adjust based on question complexity -2. **Question Clarity**: Ask specific questions for better graph retrieval -3. **Knowledge Quality**: Ensure high-quality knowledge is loaded in the graph -4. **Flow Selection**: Use flows with appropriate knowledge domains -5. **Collection Targeting**: Specify relevant collections for focused results \ No newline at end of file diff --git a/docs/cli/tg-invoke-llm.md b/docs/cli/tg-invoke-llm.md deleted file mode 100644 index 999a5320..00000000 --- a/docs/cli/tg-invoke-llm.md +++ /dev/null @@ -1,267 +0,0 @@ -# tg-invoke-llm - -Invokes the text completion service with custom system and user prompts. - -## Synopsis - -```bash -tg-invoke-llm "system prompt" "user prompt" [options] -``` - -## Description - -The `tg-invoke-llm` command provides direct access to the Large Language Model (LLM) text completion service. It allows you to specify both a system prompt (which sets the AI's behavior and context) and a user prompt (the actual query or task), giving you complete control over the LLM interaction. - -This is useful for custom AI tasks, experimentation with prompts, and direct LLM integration without the overhead of retrieval augmented generation or agent frameworks. - -## Options - -### Required Arguments - -- `system`: System prompt that defines the AI's role and behavior -- `prompt`: User prompt containing the actual query or task - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id FLOW`: Flow ID to use (default: `default`) - -## Arguments - -The command requires exactly two positional arguments: - -1. **System Prompt**: Sets the AI's context, role, and behavior -2. **User Prompt**: The specific question, task, or content to process - -## Examples - -### Basic Question Answering -```bash -tg-invoke-llm "You are a helpful assistant." "What is the capital of France?" -``` - -### Code Generation -```bash -tg-invoke-llm \ - "You are an expert Python programmer." \ - "Write a function to calculate the Fibonacci sequence." -``` - -### Creative Writing -```bash -tg-invoke-llm \ - "You are a creative writer specializing in science fiction." \ - "Write a short story about time travel in 200 words." -``` - -### Technical Documentation -```bash -tg-invoke-llm \ - "You are a technical writer who creates clear, concise documentation." \ - "Explain how REST APIs work in simple terms." -``` - -### Data Analysis -```bash -tg-invoke-llm \ - "You are a data analyst expert at interpreting statistics." \ - "Explain what a p-value means and when it's significant." -``` - -### Using Specific Flow -```bash -tg-invoke-llm \ - "You are a medical expert." \ - "Explain the difference between Type 1 and Type 2 diabetes." \ - -f medical-flow -``` - -## System Prompt Design - -The system prompt is crucial for getting good results: - -### Role Definition -```bash -"You are a [role] with expertise in [domain]." -``` - -### Behavior Instructions -```bash -"You are helpful, accurate, and concise. Always provide examples." -``` - -### Output Format -```bash -"You are a technical writer. Always structure your responses with clear headings and bullet points." -``` - -### Constraints -```bash -"You are a helpful assistant. Keep responses under 100 words and always cite sources when possible." -``` - -## Output Format - -The command returns the LLM's response directly: - -``` -The capital of France is Paris. Paris has been the capital city of France since the late 10th century and is located in the north-central part of the country along the Seine River. It is the most populous city in France with over 2 million inhabitants in the city proper and over 12 million in the metropolitan area. -``` - -## Prompt Engineering Tips - -### Effective System Prompts -- **Be Specific**: Clearly define the AI's role and expertise -- **Set Tone**: Specify the desired communication style -- **Include Constraints**: Set limits on response length or format -- **Provide Context**: Give relevant background information - -### Effective User Prompts -- **Be Clear**: State exactly what you want -- **Provide Examples**: Show the desired output format -- **Add Context**: Include relevant background information -- **Specify Format**: Request specific output structure - -## Error Handling - -### Flow Not Available -```bash -Exception: Invalid flow -``` -**Solution**: Verify the flow exists and is running with `tg-show-flows`. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Prompt Errors -```bash -Exception: Invalid prompt format -``` -**Solution**: Ensure both system and user prompts are provided as separate arguments. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-invoke-agent`](tg-invoke-agent.md) - Interactive agent with tools and reasoning -- [`tg-invoke-graph-rag`](tg-invoke-graph-rag.md) - Graph-based retrieval augmented generation -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based retrieval augmented generation -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Use predefined prompt templates - -## API Integration - -This command uses the [Text Completion API](../apis/api-text-completion.md) to perform direct LLM inference with custom prompts. - -## Use Cases - -### Development and Testing -```bash -# Test prompt variations -tg-invoke-llm "You are a code reviewer." "Review this Python function: def add(a, b): return a + b" - -# Experiment with different system prompts -tg-invoke-llm "You are a harsh critic." "What do you think of Python?" -tg-invoke-llm "You are an enthusiastic supporter." "What do you think of Python?" -``` - -### Content Generation -```bash -# Blog post writing -tg-invoke-llm \ - "You are a technical blogger who writes engaging, informative content." \ - "Write an introduction to machine learning for beginners." - -# Marketing copy -tg-invoke-llm \ - "You are a marketing copywriter focused on clear, compelling messaging." \ - "Write a product description for a cloud storage service." -``` - -### Educational Applications -```bash -# Concept explanation -tg-invoke-llm \ - "You are a teacher who explains complex topics in simple terms." \ - "Explain quantum computing to a high school student." - -# Study guides -tg-invoke-llm \ - "You are an educational content creator specializing in study materials." \ - "Create a study guide for photosynthesis." -``` - -### Business Applications -```bash -# Report summarization -tg-invoke-llm \ - "You are a business analyst who creates executive summaries." \ - "Summarize the key points from this quarterly report: [report text]" - -# Email drafting -tg-invoke-llm \ - "You are a professional communication expert." \ - "Draft a polite follow-up email for a job interview." -``` - -### Research and Analysis -```bash -# Literature review -tg-invoke-llm \ - "You are a research academic who analyzes scientific literature." \ - "What are the current trends in renewable energy research?" - -# Competitive analysis -tg-invoke-llm \ - "You are a market research analyst." \ - "Compare the features of different cloud computing platforms." -``` - -## Advanced Techniques - -### Multi-step Reasoning -```bash -# Chain of thought prompting -tg-invoke-llm \ - "You are a logical reasoner. Work through problems step by step." \ - "If a train travels 60 mph for 2 hours, then 80 mph for 1 hour, what's the average speed?" -``` - -### Format Control -```bash -# JSON output -tg-invoke-llm \ - "You are a data processor. Always respond with valid JSON." \ - "Convert this to JSON: Name: John, Age: 30, City: New York" - -# Structured responses -tg-invoke-llm \ - "You are a technical writer. Use markdown formatting with headers and lists." \ - "Explain the software development lifecycle." -``` - -### Domain Expertise -```bash -# Legal analysis -tg-invoke-llm \ - "You are a legal expert specializing in contract law." \ - "What are the key elements of a valid contract?" - -# Medical information -tg-invoke-llm \ - "You are a medical professional. Provide accurate, evidence-based information." \ - "What are the symptoms of Type 2 diabetes?" -``` - -## Best Practices - -1. **Clear System Prompts**: Define the AI's role and behavior explicitly -2. **Specific User Prompts**: Be precise about what you want -3. **Iterative Refinement**: Experiment with different prompt variations -4. **Output Validation**: Verify the quality and accuracy of responses -5. **Appropriate Flows**: Use flows configured for your specific domain -6. **Length Considerations**: Balance detail with conciseness in prompts \ No newline at end of file diff --git a/docs/cli/tg-invoke-mcp-tool.md b/docs/cli/tg-invoke-mcp-tool.md deleted file mode 100644 index 61a061d8..00000000 --- a/docs/cli/tg-invoke-mcp-tool.md +++ /dev/null @@ -1,448 +0,0 @@ -# tg-invoke-mcp-tool - -Invokes MCP (Model Control Protocol) tools through the TrustGraph API with parameter support. - -## Synopsis - -```bash -tg-invoke-mcp-tool [options] -n tool-name [-P parameters] -``` - -## Description - -The `tg-invoke-mcp-tool` command invokes MCP (Model Control Protocol) tools through the TrustGraph API. MCP tools are external services that provide standardized interfaces for AI model interactions within the TrustGraph ecosystem. - -MCP tools offer extensible functionality with consistent APIs, stateful interactions, and built-in security mechanisms. They can be used for various purposes including file operations, calculations, web requests, database queries, and custom integrations. - -## Options - -### Required Arguments - -- `-n, --name TOOL_NAME`: MCP tool name to invoke - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `-P, --parameters JSON`: Tool parameters as JSON-encoded dictionary - -## Examples - -### Basic Tool Invocation -```bash -tg-invoke-mcp-tool -n weather -``` - -### Tool with Parameters -```bash -tg-invoke-mcp-tool -n calculator -P '{"expression": "2 + 2"}' -``` - -### File Operations -```bash -tg-invoke-mcp-tool -n file-reader -P '{"path": "/path/to/file.txt"}' -``` - -### Web Request Tool -```bash -tg-invoke-mcp-tool -n http-client -P '{"url": "https://api.example.com/data", "method": "GET"}' -``` - -### Database Query -```bash -tg-invoke-mcp-tool -n database -P '{"query": "SELECT * FROM users LIMIT 10", "database": "main"}' -``` - -### Custom Flow and API URL -```bash -tg-invoke-mcp-tool -u http://custom-api:8088/ -f my-flow -n weather -P '{"location": "London"}' -``` - -## Parameter Format - -### Simple Parameters -```bash -tg-invoke-mcp-tool -n calculator -P '{"operation": "add", "a": 10, "b": 5}' -``` - -### Complex Parameters -```bash -tg-invoke-mcp-tool -n data-processor -P '{ - "input_data": [1, 2, 3, 4, 5], - "operations": ["sum", "average", "max"], - "output_format": "json" -}' -``` - -### File Input Parameters -```bash -tg-invoke-mcp-tool -n text-analyzer -P "{\"text\": \"$(cat document.txt)\", \"analysis_type\": \"sentiment\"}" -``` - -### Multiple Parameters -```bash -tg-invoke-mcp-tool -n report-generator -P '{ - "template": "monthly-report", - "data_source": "sales_database", - "period": "2024-01", - "format": "pdf", - "recipients": ["admin@example.com"] -}' -``` - -## Common MCP Tools - -### File Operations -```bash -# Read file content -tg-invoke-mcp-tool -n file-reader -P '{"path": "/path/to/file.txt"}' - -# Write file content -tg-invoke-mcp-tool -n file-writer -P '{"path": "/path/to/output.txt", "content": "Hello World"}' - -# List directory contents -tg-invoke-mcp-tool -n directory-lister -P '{"path": "/home/user", "recursive": false}' -``` - -### Data Processing -```bash -# JSON processing -tg-invoke-mcp-tool -n json-processor -P '{"data": "{\"key\": \"value\"}", "operation": "validate"}' - -# CSV analysis -tg-invoke-mcp-tool -n csv-analyzer -P '{"file": "data.csv", "columns": ["name", "age"], "operation": "statistics"}' - -# Text transformation -tg-invoke-mcp-tool -n text-transformer -P '{"text": "Hello World", "operation": "uppercase"}' -``` - -### Web and API -```bash -# HTTP requests -tg-invoke-mcp-tool -n http-client -P '{"url": "https://api.github.com/users/octocat", "method": "GET"}' - -# Web scraping -tg-invoke-mcp-tool -n web-scraper -P '{"url": "https://example.com", "selector": "h1"}' - -# API testing -tg-invoke-mcp-tool -n api-tester -P '{"endpoint": "/api/v1/users", "method": "POST", "payload": {"name": "John"}}' -``` - -### Database Operations -```bash -# Query execution -tg-invoke-mcp-tool -n database -P '{"query": "SELECT COUNT(*) FROM users", "database": "production"}' - -# Schema inspection -tg-invoke-mcp-tool -n db-inspector -P '{"database": "main", "operation": "list_tables"}' - -# Data migration -tg-invoke-mcp-tool -n db-migrator -P '{"source": "old_db", "target": "new_db", "table": "users"}' -``` - -## Output Formats - -### String Response -```bash -tg-invoke-mcp-tool -n calculator -P '{"expression": "10 + 5"}' -# Output: "15" -``` - -### JSON Response -```bash -tg-invoke-mcp-tool -n weather -P '{"location": "New York"}' -# Output: -# { -# "location": "New York", -# "temperature": 22, -# "conditions": "sunny", -# "humidity": 45 -# } -``` - -### Complex Object Response -```bash -tg-invoke-mcp-tool -n data-analyzer -P '{"dataset": "sales.csv"}' -# Output: -# { -# "summary": { -# "total_records": 1000, -# "columns": ["date", "product", "amount"], -# "date_range": "2024-01-01 to 2024-12-31" -# }, -# "statistics": { -# "total_sales": 50000, -# "average_transaction": 50.0, -# "top_product": "Widget A" -# } -# } -``` - -## Error Handling - -### Tool Not Found -```bash -Exception: MCP tool 'nonexistent-tool' not found -``` -**Solution**: Check available tools with `tg-show-mcp-tools`. - -### Invalid Parameters -```bash -Exception: Invalid JSON in parameters: Expecting property name enclosed in double quotes -``` -**Solution**: Verify JSON parameter format and escape special characters. - -### Missing Required Parameters -```bash -Exception: Required parameter 'input_data' not provided -``` -**Solution**: Check tool documentation for required parameters. - -### Flow Not Found -```bash -Exception: Flow instance 'invalid-flow' not found -``` -**Solution**: Verify flow ID exists with `tg-show-flows`. - -### Tool Execution Error -```bash -Exception: Tool execution failed: Connection timeout -``` -**Solution**: Check network connectivity and tool service availability. - -## Advanced Usage - -### Batch Processing -```bash -# Process multiple files -for file in *.txt; do - echo "Processing $file..." - tg-invoke-mcp-tool -n text-analyzer -P "{\"file\": \"$file\", \"analysis\": \"sentiment\"}" -done -``` - -### Error Handling in Scripts -```bash -#!/bin/bash -# robust-tool-invoke.sh -tool_name="$1" -parameters="$2" - -if ! result=$(tg-invoke-mcp-tool -n "$tool_name" -P "$parameters" 2>&1); then - echo "Error invoking tool: $result" >&2 - exit 1 -fi - -echo "Success: $result" -``` - -### Pipeline Processing -```bash -# Chain multiple tools -data=$(tg-invoke-mcp-tool -n data-loader -P '{"source": "database"}') -processed=$(tg-invoke-mcp-tool -n data-processor -P "{\"data\": \"$data\", \"operation\": \"clean\"}") -tg-invoke-mcp-tool -n report-generator -P "{\"data\": \"$processed\", \"format\": \"pdf\"}" -``` - -### Configuration-Driven Invocation -```bash -# Use configuration file -config_file="tool-config.json" -tool_name=$(jq -r '.tool' "$config_file") -parameters=$(jq -c '.parameters' "$config_file") - -tg-invoke-mcp-tool -n "$tool_name" -P "$parameters" -``` - -### Interactive Tool Usage -```bash -#!/bin/bash -# interactive-mcp-tool.sh -echo "Available tools:" -tg-show-mcp-tools - -read -p "Enter tool name: " tool_name -read -p "Enter parameters (JSON): " parameters - -echo "Invoking tool..." -tg-invoke-mcp-tool -n "$tool_name" -P "$parameters" -``` - -### Parallel Tool Execution -```bash -# Execute multiple tools in parallel -tools=("weather" "calculator" "file-reader") -params=('{"location": "NYC"}' '{"expression": "2+2"}' '{"path": "file.txt"}') - -for i in "${!tools[@]}"; do - ( - echo "Executing ${tools[$i]}..." - tg-invoke-mcp-tool -n "${tools[$i]}" -P "${params[$i]}" > "result-${tools[$i]}.json" - ) & -done -wait -``` - -## Tool Management - -### List Available Tools -```bash -# Show all registered MCP tools -tg-show-mcp-tools -``` - -### Register New Tools -```bash -# Register a new MCP tool -tg-set-mcp-tool weather-service "http://weather-api:8080/mcp" "Weather data provider" -``` - -### Remove Tools -```bash -# Remove an MCP tool -tg-delete-mcp-tool weather-service -``` - -## Use Cases - -### Data Processing Workflows -```bash -# Extract, transform, and load data -raw_data=$(tg-invoke-mcp-tool -n data-extractor -P '{"source": "external_api"}') -clean_data=$(tg-invoke-mcp-tool -n data-cleaner -P "{\"data\": \"$raw_data\"}") -tg-invoke-mcp-tool -n data-loader -P "{\"data\": \"$clean_data\", \"target\": \"warehouse\"}" -``` - -### Automation Scripts -```bash -# Automated system monitoring -status=$(tg-invoke-mcp-tool -n system-monitor -P '{"checks": ["cpu", "memory", "disk"]}') -if echo "$status" | grep -q "warning"; then - tg-invoke-mcp-tool -n alert-system -P "{\"message\": \"System warning detected\", \"severity\": \"medium\"}" -fi -``` - -### Integration Testing -```bash -# Test API endpoints -endpoints=("/api/users" "/api/orders" "/api/products") -for endpoint in "${endpoints[@]}"; do - result=$(tg-invoke-mcp-tool -n api-tester -P "{\"endpoint\": \"$endpoint\", \"method\": \"GET\"}") - echo "Testing $endpoint: $result" -done -``` - -### Content Generation -```bash -# Generate documentation -code_analysis=$(tg-invoke-mcp-tool -n code-analyzer -P '{"directory": "./src", "language": "python"}') -tg-invoke-mcp-tool -n doc-generator -P "{\"analysis\": \"$code_analysis\", \"format\": \"markdown\"}" -``` - -## Performance Optimization - -### Caching Tool Results -```bash -# Cache expensive tool operations -cache_dir="mcp-cache" -mkdir -p "$cache_dir" - -invoke_with_cache() { - local tool="$1" - local params="$2" - local cache_key=$(echo "$tool-$params" | md5sum | cut -d' ' -f1) - local cache_file="$cache_dir/$cache_key.json" - - if [ -f "$cache_file" ]; then - echo "Cache hit for $tool" - cat "$cache_file" - else - echo "Cache miss, invoking $tool..." - tg-invoke-mcp-tool -n "$tool" -P "$params" | tee "$cache_file" - fi -} -``` - -### Asynchronous Processing -```bash -# Non-blocking tool execution -async_invoke() { - local tool="$1" - local params="$2" - local output_file="$3" - - tg-invoke-mcp-tool -n "$tool" -P "$params" > "$output_file" 2>&1 & - echo $! # Return process ID -} - -# Execute multiple tools asynchronously -pid1=$(async_invoke "data-processor" '{"file": "data1.csv"}' "result1.json") -pid2=$(async_invoke "data-processor" '{"file": "data2.csv"}' "result2.json") - -# Wait for completion -wait $pid1 $pid2 -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-mcp-tools`](tg-show-mcp-tools.md) - List available MCP tools -- [`tg-set-mcp-tool`](tg-set-mcp-tool.md) - Register MCP tools -- [`tg-delete-mcp-tool`](tg-delete-mcp-tool.md) - Remove MCP tools -- [`tg-show-flows`](tg-show-flows.md) - List available flow instances -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Invoke prompt templates - -## API Integration - -This command uses the TrustGraph API flow interface to execute MCP tools within the context of specified flows. MCP tools are external services that implement the Model Control Protocol for standardized AI tool interactions. - -## Best Practices - -1. **Parameter Validation**: Always validate JSON parameters before execution -2. **Error Handling**: Implement robust error handling for production use -3. **Tool Discovery**: Use `tg-show-mcp-tools` to discover available tools -4. **Resource Management**: Consider performance implications of long-running tools -5. **Security**: Avoid passing sensitive data in parameters; use secure tool configurations -6. **Documentation**: Document custom tool parameters and expected responses -7. **Testing**: Test tool integrations thoroughly before production deployment - -## Troubleshooting - -### Tool Not Available -```bash -# Check tool registration -tg-show-mcp-tools | grep "tool-name" - -# Verify tool service is running -curl -f http://tool-service:8080/health -``` - -### Parameter Issues -```bash -# Validate JSON format -echo '{"key": "value"}' | jq . - -# Test with minimal parameters -tg-invoke-mcp-tool -n tool-name -P '{}' -``` - -### Flow Problems -```bash -# Check flow status -tg-show-flows | grep "flow-id" - -# Verify flow supports MCP tools -tg-get-flow-blueprint -n "flow-class" | jq '.interfaces.mcp_tool' -``` - -### Connection Issues -```bash -# Test API connectivity -curl -f http://localhost:8088/health - -# Check environment variables -echo $TRUSTGRAPH_URL -``` \ No newline at end of file diff --git a/docs/cli/tg-invoke-prompt.md b/docs/cli/tg-invoke-prompt.md deleted file mode 100644 index 1006be6f..00000000 --- a/docs/cli/tg-invoke-prompt.md +++ /dev/null @@ -1,430 +0,0 @@ -# tg-invoke-prompt - -Invokes the LLM prompt service using predefined prompt templates with variable substitution. - -## Synopsis - -```bash -tg-invoke-prompt [options] template-id [variable=value ...] -``` - -## Description - -The `tg-invoke-prompt` command invokes TrustGraph's LLM prompt service using predefined prompt templates. Templates contain placeholder variables in the format `{{variable}}` that are replaced with values provided on the command line. - -This provides a structured way to interact with language models using consistent, reusable prompt templates for specific tasks like question answering, text extraction, analysis, and more. - -## Options - -### Required Arguments - -- `template-id`: Prompt template identifier (e.g., `question`, `extract-definitions`, `summarize`) - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `variable=value`: Template variable assignments (can be specified multiple times) - -## Examples - -### Basic Question Answering -```bash -tg-invoke-prompt question text="What is artificial intelligence?" context="AI research field" -``` - -### Extract Definitions -```bash -tg-invoke-prompt extract-definitions \ - document="Machine learning is a subset of artificial intelligence..." \ - terms="machine learning,neural networks" -``` - -### Text Summarization -```bash -tg-invoke-prompt summarize \ - text="$(cat large-document.txt)" \ - max_length="200" \ - style="technical" -``` - -### Custom Flow and Variables -```bash -tg-invoke-prompt analysis \ - -f "research-flow" \ - data="$(cat research-data.json)" \ - focus="trends" \ - output_format="markdown" -``` - -## Variable Substitution - -Templates use `{{variable}}` placeholders that are replaced with command-line values: - -### Simple Variables -```bash -tg-invoke-prompt greeting name="Alice" time="morning" -# Template: "Good {{time}}, {{name}}!" -# Result: "Good morning, Alice!" -``` - -### Complex Variables -```bash -tg-invoke-prompt analyze \ - dataset="$(cat data.csv)" \ - columns="name,age,salary" \ - analysis_type="statistical_summary" -``` - -### Multi-line Variables -```bash -tg-invoke-prompt review \ - code="$(cat app.py)" \ - checklist="security,performance,maintainability" \ - severity="high" -``` - -## Common Template Types - -### Question Answering -```bash -# Direct question -tg-invoke-prompt question \ - text="What is the capital of France?" \ - context="geography" - -# Contextual question -tg-invoke-prompt question \ - text="How does this work?" \ - context="$(cat technical-manual.txt)" -``` - -### Text Processing -```bash -# Extract key information -tg-invoke-prompt extract-key-points \ - document="$(cat meeting-notes.txt)" \ - format="bullet_points" - -# Text classification -tg-invoke-prompt classify \ - text="Customer is very unhappy with service" \ - categories="positive,negative,neutral" -``` - -### Code Analysis -```bash -# Code review -tg-invoke-prompt code-review \ - code="$(cat script.py)" \ - language="python" \ - focus="security,performance" - -# Bug analysis -tg-invoke-prompt debug \ - code="$(cat buggy-code.js)" \ - error="TypeError: Cannot read property 'length' of undefined" -``` - -### Data Analysis -```bash -# Data insights -tg-invoke-prompt data-analysis \ - data="$(cat sales-data.json)" \ - metrics="revenue,growth,trends" \ - period="quarterly" -``` - -## Template Management - -### List Available Templates -```bash -# Show available prompt templates -tg-show-prompts -``` - -### Create Custom Templates -```bash -# Define a new template -tg-set-prompt analysis-template \ - "Analyze the following {{data_type}}: {{data}}. Focus on {{focus_areas}}. Output format: {{format}}" -``` - -### Template Variables -Common template variables: -- `{{text}}` - Input text to process -- `{{context}}` - Additional context information -- `{{format}}` - Output format specification -- `{{language}}` - Programming language for code analysis -- `{{style}}` - Writing or analysis style -- `{{length}}` - Length constraints for output - -## Output Formats - -### String Response -```bash -tg-invoke-prompt summarize text="Long document..." max_length="100" -# Output: "This document discusses..." -``` - -### JSON Response -```bash -tg-invoke-prompt extract-structured data="Name: John, Age: 30, City: NYC" -# Output: -# { -# "name": "John", -# "age": 30, -# "city": "NYC" -# } -``` - -## Error Handling - -### Missing Template -```bash -Exception: Template 'nonexistent-template' not found -``` -**Solution**: Check available templates with `tg-show-prompts`. - -### Missing Variables -```bash -Exception: Template variable 'required_var' not provided -``` -**Solution**: Provide all required variables as `variable=value` arguments. - -### Malformed Variables -```bash -Exception: Malformed variable: invalid-format -``` -**Solution**: Use `variable=value` format for all variable assignments. - -### Flow Not Found -```bash -Exception: Flow instance 'invalid-flow' not found -``` -**Solution**: Verify flow ID exists with `tg-show-flows`. - -## Advanced Usage - -### File Input Processing -```bash -# Process multiple files -for file in *.txt; do - echo "Processing $file..." - tg-invoke-prompt summarize \ - text="$(cat "$file")" \ - filename="$file" \ - max_length="150" -done -``` - -### Batch Processing -```bash -# Process data in batches -while IFS= read -r line; do - tg-invoke-prompt classify \ - text="$line" \ - categories="spam,ham,promotional" \ - confidence_threshold="0.8" -done < input-data.txt -``` - -### Pipeline Processing -```bash -# Chain multiple prompts -initial_analysis=$(tg-invoke-prompt analyze data="$(cat raw-data.json)") -summary=$(tg-invoke-prompt summarize text="$initial_analysis" style="executive") -echo "$summary" -``` - -### Interactive Processing -```bash -#!/bin/bash -# interactive-prompt.sh -template="$1" - -if [ -z "$template" ]; then - echo "Usage: $0 " - exit 1 -fi - -echo "Interactive prompt using template: $template" -echo "Enter variables (var=value), empty line to execute:" - -variables=() -while true; do - read -p "> " input - if [ -z "$input" ]; then - break - fi - variables+=("$input") -done - -echo "Executing prompt..." -tg-invoke-prompt "$template" "${variables[@]}" -``` - -### Configuration-Driven Processing -```bash -# Use configuration file for prompts -config_file="prompt-config.json" -template=$(jq -r '.template' "$config_file") -variables=$(jq -r '.variables | to_entries[] | "\(.key)=\(.value)"' "$config_file") - -tg-invoke-prompt "$template" $variables -``` - -## Performance Optimization - -### Caching Results -```bash -# Cache prompt results -cache_dir="prompt-cache" -mkdir -p "$cache_dir" - -invoke_with_cache() { - local template="$1" - shift - local args="$@" - local cache_key=$(echo "$template-$args" | md5sum | cut -d' ' -f1) - local cache_file="$cache_dir/$cache_key.txt" - - if [ -f "$cache_file" ]; then - echo "Cache hit" - cat "$cache_file" - else - echo "Cache miss, invoking prompt..." - tg-invoke-prompt "$template" "$@" | tee "$cache_file" - fi -} -``` - -### Parallel Processing -```bash -# Process multiple items in parallel -input_files=(file1.txt file2.txt file3.txt) -for file in "${input_files[@]}"; do - ( - echo "Processing $file..." - tg-invoke-prompt analyze \ - text="$(cat "$file")" \ - filename="$file" > "result-$file.json" - ) & -done -wait -``` - -## Use Cases - -### Document Processing -```bash -# Extract metadata from documents -tg-invoke-prompt extract-metadata \ - document="$(cat document.pdf)" \ - fields="title,author,date,keywords" - -# Generate document summaries -tg-invoke-prompt summarize \ - text="$(cat report.txt)" \ - audience="executives" \ - key_points="5" -``` - -### Code Analysis -```bash -# Security analysis -tg-invoke-prompt security-review \ - code="$(cat webapp.py)" \ - framework="flask" \ - focus="injection,authentication" - -# Performance optimization suggestions -tg-invoke-prompt optimize \ - code="$(cat slow-function.js)" \ - language="javascript" \ - target="performance" -``` - -### Data Analysis -```bash -# Generate insights from data -tg-invoke-prompt insights \ - data="$(cat metrics.json)" \ - timeframe="monthly" \ - focus="trends,anomalies" - -# Create data visualizations -tg-invoke-prompt visualize \ - data="$(cat sales-data.csv)" \ - chart_type="line" \ - metrics="revenue,growth" -``` - -### Content Generation -```bash -# Generate marketing copy -tg-invoke-prompt marketing \ - product="AI Assistant" \ - audience="developers" \ - tone="professional,friendly" - -# Create technical documentation -tg-invoke-prompt document \ - code="$(cat api.py)" \ - format="markdown" \ - sections="overview,examples,parameters" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-prompts`](tg-show-prompts.md) - List available prompt templates -- [`tg-set-prompt`](tg-set-prompt.md) - Create/update prompt templates -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based question answering -- [`tg-show-flows`](tg-show-flows.md) - List available flow instances - -## API Integration - -This command uses the prompt service API to process templates and generate responses using configured language models. - -## Best Practices - -1. **Template Reuse**: Create reusable templates for common tasks -2. **Variable Validation**: Validate required variables before execution -3. **Error Handling**: Implement proper error handling for production use -4. **Caching**: Cache results for repeated operations -5. **Documentation**: Document custom templates and their expected variables -6. **Security**: Avoid embedding sensitive data in templates -7. **Performance**: Use appropriate flow instances for different workloads - -## Troubleshooting - -### Template Not Found -```bash -# Check available templates -tg-show-prompts - -# Verify template name spelling -tg-show-prompts | grep "template-name" -``` - -### Variable Errors -```bash -# Check template definition for required variables -tg-show-prompts | grep -A 10 "template-name" - -# Validate variable format -echo "variable=value" | grep "=" -``` - -### Flow Issues -```bash -# Check flow status -tg-show-flows | grep "flow-id" - -# Verify flow has prompt service -tg-get-flow-blueprint -n "flow-class" | jq '.interfaces.prompt' -``` \ No newline at end of file diff --git a/docs/cli/tg-load-doc-embeds.md b/docs/cli/tg-load-doc-embeds.md deleted file mode 100644 index 4309faf2..00000000 --- a/docs/cli/tg-load-doc-embeds.md +++ /dev/null @@ -1,568 +0,0 @@ -# tg-load-doc-embeds - -Loads document embeddings from MessagePack format into TrustGraph processing pipelines. - -## Synopsis - -```bash -tg-load-doc-embeds -i INPUT_FILE [options] -``` - -## Description - -The `tg-load-doc-embeds` command loads document embeddings from MessagePack files into a running TrustGraph system. This is typically used to restore previously saved document embeddings or to load embeddings generated by external systems. - -The command reads document embedding data in MessagePack format and streams it to TrustGraph's document embeddings import API via WebSocket connections. - -## Options - -### Required Arguments - -- `-i, --input-file FILE`: Input MessagePack file containing document embeddings - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_API` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `--format FORMAT`: Input format - `msgpack` or `json` (default: `msgpack`) -- `--user USER`: Override user ID from input data -- `--collection COLLECTION`: Override collection ID from input data - -## Examples - -### Basic Loading -```bash -tg-load-doc-embeds -i document-embeddings.msgpack -``` - -### Load with Custom Flow -```bash -tg-load-doc-embeds \ - -i embeddings.msgpack \ - -f "document-processing-flow" -``` - -### Override User and Collection -```bash -tg-load-doc-embeds \ - -i embeddings.msgpack \ - --user "research-team" \ - --collection "research-docs" -``` - -### Load from JSON Format -```bash -tg-load-doc-embeds \ - -i embeddings.json \ - --format json -``` - -### Production Loading -```bash -tg-load-doc-embeds \ - -i production-embeddings.msgpack \ - -u https://trustgraph-api.company.com/ \ - -f "production-flow" \ - --user "system" \ - --collection "production-docs" -``` - -## Input Data Format - -### MessagePack Structure -Document embeddings are stored as MessagePack records with this structure: - -```json -["de", { - "m": { - "i": "document-id", - "m": [{"metadata": "objects"}], - "u": "user-id", - "c": "collection-id" - }, - "c": [{ - "c": "text chunk content", - "v": [0.1, 0.2, 0.3, ...] - }] -}] -``` - -### Components -- **Document Metadata** (`m`): - - `i`: Document ID - - `m`: Document metadata objects - - `u`: User ID - - `c`: Collection ID -- **Chunks** (`c`): Array of text chunks with embeddings: - - `c`: Text content of the chunk - - `v`: Vector embedding array - -## Use Cases - -### Backup Restoration -```bash -# Restore document embeddings from backup -restore_embeddings() { - local backup_file="$1" - local target_collection="$2" - - echo "Restoring document embeddings from: $backup_file" - - if [ ! -f "$backup_file" ]; then - echo "Backup file not found: $backup_file" - return 1 - fi - - # Verify backup file - if tg-dump-msgpack -i "$backup_file" --summary | grep -q "Vector dimension:"; then - echo "✓ Backup file contains embeddings" - else - echo "✗ Backup file does not contain valid embeddings" - return 1 - fi - - # Load embeddings - tg-load-doc-embeds \ - -i "$backup_file" \ - --collection "$target_collection" - - echo "Embedding restoration complete" -} - -# Restore from backup -restore_embeddings "backup-20231215.msgpack" "restored-docs" -``` - -### Data Migration -```bash -# Migrate embeddings between environments -migrate_embeddings() { - local source_file="$1" - local target_env="$2" - local target_user="$3" - - echo "Migrating embeddings to: $target_env" - - # Load to target environment - tg-load-doc-embeds \ - -i "$source_file" \ - -u "https://$target_env/api/" \ - --user "$target_user" \ - --collection "migrated-docs" - - echo "Migration complete" -} - -# Migrate to production -migrate_embeddings "dev-embeddings.msgpack" "prod.company.com" "migration-user" -``` - -### Batch Processing -```bash -# Load multiple embedding files -batch_load_embeddings() { - local input_dir="$1" - local collection="$2" - - echo "Batch loading embeddings from: $input_dir" - - for file in "$input_dir"/*.msgpack; do - if [ -f "$file" ]; then - echo "Loading: $(basename "$file")" - - tg-load-doc-embeds \ - -i "$file" \ - --collection "$collection" - - if [ $? -eq 0 ]; then - echo "✓ Loaded: $(basename "$file")" - else - echo "✗ Failed: $(basename "$file")" - fi - fi - done - - echo "Batch loading complete" -} - -# Load all embeddings -batch_load_embeddings "embeddings/" "batch-processed" -``` - -### Incremental Loading -```bash -# Load new embeddings incrementally -incremental_load() { - local embeddings_dir="$1" - local processed_log="processed_embeddings.log" - - # Create log if it doesn't exist - touch "$processed_log" - - for file in "$embeddings_dir"/*.msgpack; do - if [ -f "$file" ]; then - # Check if already processed - if grep -q "$(basename "$file")" "$processed_log"; then - echo "Skipping already processed: $(basename "$file")" - continue - fi - - echo "Processing new file: $(basename "$file")" - - if tg-load-doc-embeds -i "$file"; then - echo "$(date): $(basename "$file")" >> "$processed_log" - echo "✓ Processed: $(basename "$file")" - else - echo "✗ Failed: $(basename "$file")" - fi - fi - done -} - -# Run incremental loading -incremental_load "embeddings/" -``` - -## Advanced Usage - -### Parallel Loading -```bash -# Load multiple files in parallel -parallel_load_embeddings() { - local files=("$@") - local max_parallel=3 - local current_jobs=0 - - for file in "${files[@]}"; do - # Wait if max parallel jobs reached - while [ $current_jobs -ge $max_parallel ]; do - wait -n # Wait for any job to complete - current_jobs=$((current_jobs - 1)) - done - - # Start loading in background - ( - echo "Loading: $file" - tg-load-doc-embeds -i "$file" - echo "Completed: $file" - ) & - - current_jobs=$((current_jobs + 1)) - done - - # Wait for all remaining jobs - wait - echo "All parallel loading completed" -} - -# Load files in parallel -embedding_files=(embeddings1.msgpack embeddings2.msgpack embeddings3.msgpack) -parallel_load_embeddings "${embedding_files[@]}" -``` - -### Validation and Loading -```bash -# Validate before loading -validate_and_load() { - local file="$1" - local collection="$2" - - echo "Validating embedding file: $file" - - # Check file exists and is readable - if [ ! -r "$file" ]; then - echo "Error: Cannot read file $file" - return 1 - fi - - # Validate MessagePack structure - if ! tg-dump-msgpack -i "$file" --summary > /dev/null 2>&1; then - echo "Error: Invalid MessagePack format" - return 1 - fi - - # Check for document embeddings - if ! tg-dump-msgpack -i "$file" | grep -q '^\["de"'; then - echo "Error: No document embeddings found" - return 1 - fi - - # Get embedding statistics - summary=$(tg-dump-msgpack -i "$file" --summary) - vector_dim=$(echo "$summary" | grep "Vector dimension:" | awk '{print $3}') - - if [ -n "$vector_dim" ]; then - echo "✓ Found embeddings with dimension: $vector_dim" - else - echo "Warning: Could not determine vector dimension" - fi - - # Load embeddings - echo "Loading validated embeddings..." - tg-load-doc-embeds -i "$file" --collection "$collection" - - echo "Loading complete" -} - -# Validate and load -validate_and_load "embeddings.msgpack" "validated-docs" -``` - -### Progress Monitoring -```bash -# Monitor loading progress -monitor_loading() { - local file="$1" - local log_file="loading_progress.log" - - # Start loading in background - tg-load-doc-embeds -i "$file" > "$log_file" 2>&1 & - local load_pid=$! - - echo "Monitoring loading progress (PID: $load_pid)..." - - # Monitor progress - while kill -0 $load_pid 2>/dev/null; do - if [ -f "$log_file" ]; then - # Extract progress from log - embeddings_count=$(grep -o "Document embeddings:.*[0-9]" "$log_file" | tail -1 | awk '{print $3}') - if [ -n "$embeddings_count" ]; then - echo "Progress: $embeddings_count embeddings loaded" - fi - fi - sleep 5 - done - - # Check final status - wait $load_pid - if [ $? -eq 0 ]; then - echo "✓ Loading completed successfully" - else - echo "✗ Loading failed" - cat "$log_file" - fi - - rm "$log_file" -} - -# Monitor loading -monitor_loading "large-embeddings.msgpack" -``` - -### Data Transformation -```bash -# Transform embeddings during loading -transform_and_load() { - local input_file="$1" - local output_file="transformed-$(basename "$input_file")" - local new_user="$2" - local new_collection="$3" - - echo "Transforming embeddings: user=$new_user, collection=$new_collection" - - # This would require a transformation script - # For now, we'll show the concept - - # Load with override parameters - tg-load-doc-embeds \ - -i "$input_file" \ - --user "$new_user" \ - --collection "$new_collection" - - echo "Transformation and loading complete" -} - -# Transform during loading -transform_and_load "original.msgpack" "new-user" "new-collection" -``` - -## Performance Optimization - -### Memory Management -```bash -# Monitor memory usage during loading -monitor_memory_usage() { - local file="$1" - - echo "Starting memory-monitored loading..." - - # Start loading in background - tg-load-doc-embeds -i "$file" & - local load_pid=$! - - # Monitor memory usage - while kill -0 $load_pid 2>/dev/null; do - memory_usage=$(ps -p $load_pid -o rss= 2>/dev/null | awk '{print $1/1024}') - if [ -n "$memory_usage" ]; then - echo "Memory usage: ${memory_usage}MB" - fi - sleep 10 - done - - wait $load_pid - echo "Loading completed" -} -``` - -### Chunked Loading -```bash -# Load large files in chunks -chunked_load() { - local large_file="$1" - local chunk_size=1000 # Records per chunk - - echo "Loading large file in chunks: $large_file" - - # Split the MessagePack file (this would need special tooling) - # For demonstration, assuming we have pre-split files - - for chunk in "${large_file%.msgpack}"_chunk_*.msgpack; do - if [ -f "$chunk" ]; then - echo "Loading chunk: $(basename "$chunk")" - tg-load-doc-embeds -i "$chunk" - - # Add delay between chunks to reduce system load - sleep 2 - fi - done - - echo "Chunked loading complete" -} -``` - -## Error Handling - -### File Not Found -```bash -Exception: [Errno 2] No such file or directory -``` -**Solution**: Verify file path and ensure the MessagePack file exists. - -### Invalid Format -```bash -Exception: Unpack failed -``` -**Solution**: Verify the file is a valid MessagePack file with document embeddings. - -### WebSocket Connection Issues -```bash -Exception: Connection failed -``` -**Solution**: Check API URL and ensure TrustGraph is running with WebSocket support. - -### Memory Errors -```bash -MemoryError: Unable to allocate memory -``` -**Solution**: Process large files in smaller chunks or increase available memory. - -### Flow Not Found -```bash -Exception: Flow not found -``` -**Solution**: Verify the flow ID exists with `tg-show-flows`. - -## Integration with Other Commands - -### Complete Workflow -```bash -# Complete document processing workflow -process_documents_workflow() { - local pdf_dir="$1" - local embeddings_file="embeddings.msgpack" - - echo "Starting complete document workflow..." - - # 1. Load PDFs - for pdf in "$pdf_dir"/*.pdf; do - tg-load-pdf "$pdf" - done - - # 2. Wait for processing - sleep 30 - - # 3. Save embeddings - tg-save-doc-embeds -o "$embeddings_file" - - # 4. Process embeddings (example: load to different collection) - tg-load-doc-embeds -i "$embeddings_file" --collection "processed-docs" - - echo "Complete workflow finished" -} -``` - -### Backup and Restore -```bash -# Complete backup and restore cycle -backup_restore_cycle() { - local backup_file="embeddings-backup.msgpack" - - echo "Creating embeddings backup..." - tg-save-doc-embeds -o "$backup_file" - - echo "Simulating data loss..." - # (In real scenario, this might be system failure) - - echo "Restoring from backup..." - tg-load-doc-embeds -i "$backup_file" --collection "restored" - - echo "Backup/restore cycle complete" -} -``` - -## Environment Variables - -- `TRUSTGRAPH_API`: Default API URL - -## Related Commands - -- [`tg-save-doc-embeds`](tg-save-doc-embeds.md) - Save document embeddings to MessagePack -- [`tg-dump-msgpack`](tg-dump-msgpack.md) - Analyze MessagePack files -- [`tg-load-pdf`](tg-load-pdf.md) - Load PDF documents for processing -- [`tg-show-flows`](tg-show-flows.md) - List available flows - -## API Integration - -This command uses TrustGraph's WebSocket API for document embeddings import, specifically the `/api/v1/flow/{flow-id}/import/document-embeddings` endpoint. - -## Best Practices - -1. **Validation**: Always validate MessagePack files before loading -2. **Backups**: Keep backups of original embedding files -3. **Monitoring**: Monitor memory usage and loading progress -4. **Chunking**: Process large files in manageable chunks -5. **Error Handling**: Implement robust error handling and retry logic -6. **Documentation**: Document the source and format of embedding files -7. **Testing**: Test loading procedures in non-production environments - -## Troubleshooting - -### Loading Stalls -```bash -# Check WebSocket connection -netstat -an | grep :8088 - -# Check system resources -free -h -df -h -``` - -### Incomplete Loading -```bash -# Compare input vs loaded data -input_count=$(tg-dump-msgpack -i input.msgpack | grep '^\["de"' | wc -l) -echo "Input embeddings: $input_count" - -# Check loaded data (would need query command) -# loaded_count=$(tg-query-embeddings --count) -# echo "Loaded embeddings: $loaded_count" -``` - -### Performance Issues -```bash -# Monitor network usage -iftop - -# Check TrustGraph service logs -docker logs trustgraph-service -``` \ No newline at end of file diff --git a/docs/cli/tg-load-kg-core.md b/docs/cli/tg-load-kg-core.md deleted file mode 100644 index d83c8dd6..00000000 --- a/docs/cli/tg-load-kg-core.md +++ /dev/null @@ -1,313 +0,0 @@ -# tg-load-kg-core - -Loads a stored knowledge core into a processing flow for active use. - -## Synopsis - -```bash -tg-load-kg-core --id CORE_ID [options] -``` - -## Description - -The `tg-load-kg-core` command loads a previously stored knowledge core into an active processing flow, making the knowledge available for queries, reasoning, and other AI operations. This is different from storing knowledge cores - this command makes stored knowledge active and accessible within a specific flow context. - -Once loaded, the knowledge core's RDF triples and graph embeddings become available for Graph RAG queries, agent reasoning, and other knowledge-based operations within the specified flow. - -## Options - -### Required Arguments - -- `--id, --identifier CORE_ID`: Identifier of the knowledge core to load - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-f, --flow-id FLOW`: Flow ID to load knowledge into (default: `default`) -- `-c, --collection COLLECTION`: Collection identifier (default: `default`) - -## Examples - -### Load Knowledge Core into Default Flow -```bash -tg-load-kg-core --id "research-knowledge-v1" -``` - -### Load into Specific Flow -```bash -tg-load-kg-core \ - --id "medical-knowledge" \ - --flow-id "medical-analysis" \ - --user researcher -``` - -### Load with Custom Collection -```bash -tg-load-kg-core \ - --id "legal-documents" \ - --flow-id "legal-flow" \ - --collection "law-firm-data" -``` - -### Using Custom API URL -```bash -tg-load-kg-core \ - --id "production-knowledge" \ - --flow-id "prod-flow" \ - -u http://production:8088/ -``` - -## Prerequisites - -### Knowledge Core Must Exist -The knowledge core must be stored in the system: - -```bash -# Check available knowledge cores -tg-show-kg-cores - -# Store knowledge core if needed -tg-put-kg-core --id "my-knowledge" -i knowledge.msgpack -``` - -### Flow Must Be Running -The target flow must be active: - -```bash -# Check running flows -tg-show-flows - -# Start flow if needed -tg-start-flow -n "my-class" -i "my-flow" -d "Knowledge processing flow" -``` - -## Loading Process - -1. **Validation**: Verifies knowledge core exists and flow is running -2. **Knowledge Retrieval**: Retrieves RDF triples and graph embeddings -3. **Flow Integration**: Makes knowledge available within flow context -4. **Index Building**: Creates searchable indexes for efficient querying -5. **Service Activation**: Enables knowledge-based services in the flow - -## What Gets Loaded - -### RDF Triples -- Subject-predicate-object relationships -- Entity definitions and properties -- Factual knowledge and assertions -- Metadata and provenance information - -### Graph Embeddings -- Vector representations of entities -- Semantic similarity data -- Neural network-compatible formats -- Machine learning-ready representations - -## Knowledge Availability - -Once loaded, knowledge becomes available through: - -### Graph RAG Queries -```bash -tg-invoke-graph-rag \ - -q "What information is available about AI research?" \ - -f my-flow -``` - -### Agent Interactions -```bash -tg-invoke-agent \ - -q "Tell me about the loaded knowledge" \ - -f my-flow -``` - -### Direct Triple Queries -```bash -tg-show-graph -f my-flow -``` - -## Output - -Successful loading typically produces no output, but knowledge becomes queryable: - -```bash -# Load knowledge (no output expected) -tg-load-kg-core --id "research-knowledge" - -# Verify loading by querying -tg-show-graph | head -10 -``` - -## Error Handling - -### Knowledge Core Not Found -```bash -Exception: Knowledge core 'invalid-core' not found -``` -**Solution**: Check available cores with `tg-show-kg-cores` and verify the core ID. - -### Flow Not Found -```bash -Exception: Flow 'invalid-flow' not found -``` -**Solution**: Verify the flow exists and is running with `tg-show-flows`. - -### Permission Errors -```bash -Exception: Access denied to knowledge core -``` -**Solution**: Verify user permissions for the specified knowledge core. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Resource Errors -```bash -Exception: Insufficient memory to load knowledge core -``` -**Solution**: Check system resources or try loading smaller knowledge cores. - -## Knowledge Core Management - -### Loading Workflow -```bash -# 1. Check available knowledge -tg-show-kg-cores - -# 2. Ensure flow is running -tg-show-flows - -# 3. Load knowledge into flow -tg-load-kg-core --id "my-knowledge" --flow-id "my-flow" - -# 4. Verify knowledge is accessible -tg-invoke-graph-rag -q "What knowledge is loaded?" -f my-flow -``` - -### Multiple Knowledge Cores -```bash -# Load multiple cores for comprehensive knowledge -tg-load-kg-core --id "core-1" --flow-id "research-flow" -tg-load-kg-core --id "core-2" --flow-id "research-flow" -tg-load-kg-core --id "core-3" --flow-id "research-flow" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-put-kg-core`](tg-put-kg-core.md) - Store knowledge core in system -- [`tg-unload-kg-core`](tg-unload-kg-core.md) - Remove knowledge from flow -- [`tg-show-graph`](tg-show-graph.md) - View loaded knowledge triples -- [`tg-invoke-graph-rag`](tg-invoke-graph-rag.md) - Query loaded knowledge - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) with the `load-kg-core` operation to make stored knowledge active within flows. - -## Use Cases - -### Research Analysis -```bash -# Load research knowledge for analysis -tg-load-kg-core \ - --id "research-papers-2024" \ - --flow-id "research-analysis" \ - --collection "academic-research" - -# Query the research knowledge -tg-invoke-graph-rag \ - -q "What are the main research trends in AI?" \ - -f research-analysis -``` - -### Domain-Specific Processing -```bash -# Load medical knowledge for healthcare analysis -tg-load-kg-core \ - --id "medical-terminology" \ - --flow-id "healthcare-nlp" \ - --user medical-team -``` - -### Multi-Domain Knowledge -```bash -# Load knowledge from multiple domains -tg-load-kg-core --id "technical-specs" --flow-id "analysis-flow" -tg-load-kg-core --id "business-data" --flow-id "analysis-flow" -tg-load-kg-core --id "market-research" --flow-id "analysis-flow" -``` - -### Development and Testing -```bash -# Load test knowledge for development -tg-load-kg-core \ - --id "test-knowledge" \ - --flow-id "dev-flow" \ - --user developer -``` - -### Production Processing -```bash -# Load production knowledge -tg-load-kg-core \ - --id "production-kb-v2.1" \ - --flow-id "production-flow" \ - --collection "live-data" -``` - -## Performance Considerations - -### Loading Time -- Large knowledge cores may take time to load -- Loading includes indexing for efficient querying -- Multiple cores can be loaded incrementally - -### Memory Usage -- Knowledge cores consume memory proportional to their size -- Monitor system resources when loading large cores -- Consider flow capacity when loading multiple cores - -### Query Performance -- Loaded knowledge enables faster query responses -- Pre-built indexes improve search performance -- Multiple cores may impact query speed - -## Best Practices - -1. **Pre-Loading**: Load knowledge cores before intensive querying -2. **Resource Planning**: Monitor memory usage with large knowledge cores -3. **Flow Management**: Use dedicated flows for specific knowledge domains -4. **Version Control**: Load specific knowledge core versions for reproducibility -5. **Testing**: Verify knowledge loading with simple queries -6. **Documentation**: Document which knowledge cores are loaded in which flows - -## Knowledge Loading Strategy - -### Single Domain -```bash -# Load focused knowledge for specific tasks -tg-load-kg-core --id "specialized-domain" --flow-id "domain-flow" -``` - -### Multi-Domain -```bash -# Load comprehensive knowledge for broad analysis -tg-load-kg-core --id "general-knowledge" --flow-id "general-flow" -tg-load-kg-core --id "domain-specific" --flow-id "general-flow" -``` - -### Incremental Loading -```bash -# Load knowledge incrementally as needed -tg-load-kg-core --id "base-knowledge" --flow-id "analysis-flow" -# ... perform some analysis ... -tg-load-kg-core --id "additional-knowledge" --flow-id "analysis-flow" -``` \ No newline at end of file diff --git a/docs/cli/tg-load-pdf.md b/docs/cli/tg-load-pdf.md deleted file mode 100644 index d6990bd2..00000000 --- a/docs/cli/tg-load-pdf.md +++ /dev/null @@ -1,480 +0,0 @@ -# tg-load-pdf - -Loads PDF documents into TrustGraph for processing and analysis. - -## Synopsis - -```bash -tg-load-pdf [options] file1.pdf [file2.pdf ...] -``` - -## Description - -The `tg-load-pdf` command loads PDF documents into TrustGraph by directing them to the PDF decoder service. The command extracts content, generates document metadata, and makes the documents available for processing by other TrustGraph services. - -Each PDF is assigned a unique identifier based on its content hash, and comprehensive metadata can be attached including copyright information, publication details, and keywords. - -**Note**: Consider using `tg-add-library-document` followed by `tg-start-library-processing` for more comprehensive document management. - -## Options - -### Required Arguments - -- `files`: One or more PDF files to load - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `-U, --user USER`: User ID for document ownership (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection to assign document (default: `default`) - -### Document Metadata - -- `--name NAME`: Document name/title -- `--description DESCRIPTION`: Document description -- `--identifier ID`: Custom document identifier -- `--document-url URL`: Source URL for the document -- `--keyword KEYWORD`: Document keywords (can be specified multiple times) - -### Copyright Information - -- `--copyright-notice NOTICE`: Copyright notice text -- `--copyright-holder HOLDER`: Copyright holder name -- `--copyright-year YEAR`: Copyright year -- `--license LICENSE`: Copyright license - -### Publication Details - -- `--publication-organization ORG`: Publishing organization -- `--publication-description DESC`: Publication description -- `--publication-date DATE`: Publication date - -## Examples - -### Basic PDF Loading -```bash -tg-load-pdf document.pdf -``` - -### Multiple Files -```bash -tg-load-pdf report1.pdf report2.pdf manual.pdf -``` - -### With Basic Metadata -```bash -tg-load-pdf \ - --name "Technical Manual" \ - --description "System administration guide" \ - --keyword "technical" --keyword "manual" \ - technical-manual.pdf -``` - -### Complete Metadata -```bash -tg-load-pdf \ - --name "Annual Report 2023" \ - --description "Company annual financial report" \ - --copyright-holder "Acme Corporation" \ - --copyright-year "2023" \ - --license "All Rights Reserved" \ - --publication-organization "Acme Corporation" \ - --publication-date "2023-12-31" \ - --keyword "financial" --keyword "annual" --keyword "report" \ - annual-report-2023.pdf -``` - -### Custom Flow and Collection -```bash -tg-load-pdf \ - -f "document-processing-flow" \ - -U "finance-team" \ - -C "financial-documents" \ - --name "Budget Analysis" \ - budget-2024.pdf -``` - -## Document Processing - -### Content Extraction -The PDF loader: -1. Calculates SHA256 hash for unique document ID -2. Extracts text content from PDF -3. Preserves document structure and formatting metadata -4. Generates searchable text index - -### Metadata Generation -Document metadata includes: -- **Document ID**: SHA256 hash-based unique identifier -- **Content Hash**: For duplicate detection -- **File Information**: Size, format, creation date -- **Custom Metadata**: User-provided attributes - -### Integration with Processing Pipeline -```bash -# Load PDF and start processing -tg-load-pdf research-paper.pdf --name "AI Research Paper" - -# Check processing status -tg-show-flows | grep "document-processing" - -# Query loaded content -tg-invoke-document-rag -q "What is the main conclusion?" -C "default" -``` - -## Error Handling - -### File Not Found -```bash -Exception: [Errno 2] No such file or directory: 'missing.pdf' -``` -**Solution**: Verify file path and ensure PDF exists. - -### Invalid PDF Format -```bash -Exception: PDF parsing failed: Invalid PDF structure -``` -**Solution**: Verify PDF is not corrupted and is a valid PDF file. - -### Permission Errors -```bash -Exception: [Errno 13] Permission denied: 'protected.pdf' -``` -**Solution**: Check file permissions and ensure read access. - -### Flow Not Found -```bash -Exception: Flow instance 'invalid-flow' not found -``` -**Solution**: Verify flow ID exists with `tg-show-flows`. - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -## Advanced Usage - -### Batch Processing -```bash -# Process all PDFs in directory -for pdf in *.pdf; do - echo "Loading $pdf..." - tg-load-pdf \ - --name "$(basename "$pdf" .pdf)" \ - --collection "research-papers" \ - "$pdf" -done -``` - -### Organized Loading -```bash -# Load with structured metadata -categories=("technical" "financial" "legal") -for category in "${categories[@]}"; do - for pdf in "$category"/*.pdf; do - if [ -f "$pdf" ]; then - tg-load-pdf \ - --collection "$category-documents" \ - --keyword "$category" \ - --name "$(basename "$pdf" .pdf)" \ - "$pdf" - fi - done -done -``` - -### CSV-Driven Loading -```bash -# Load PDFs with metadata from CSV -# Format: filename,title,description,keywords -while IFS=',' read -r filename title description keywords; do - if [ -f "$filename" ]; then - echo "Loading $filename..." - - # Convert comma-separated keywords to multiple --keyword args - keyword_args="" - IFS='|' read -ra KEYWORDS <<< "$keywords" - for kw in "${KEYWORDS[@]}"; do - keyword_args="$keyword_args --keyword \"$kw\"" - done - - eval "tg-load-pdf \ - --name \"$title\" \ - --description \"$description\" \ - $keyword_args \ - \"$filename\"" - fi -done < documents.csv -``` - -### Publication Processing -```bash -# Load academic papers with publication details -load_academic_paper() { - local file="$1" - local title="$2" - local authors="$3" - local journal="$4" - local year="$5" - - tg-load-pdf \ - --name "$title" \ - --description "Academic paper: $title" \ - --copyright-holder "$authors" \ - --copyright-year "$year" \ - --publication-organization "$journal" \ - --publication-date "$year-01-01" \ - --keyword "academic" --keyword "research" \ - "$file" -} - -# Usage -load_academic_paper "ai-paper.pdf" "AI in Healthcare" "Smith et al." "AI Journal" "2023" -``` - -## Monitoring and Validation - -### Load Status Checking -```bash -# Check document loading progress -check_load_status() { - local file="$1" - local expected_name="$2" - - echo "Checking load status for: $file" - - # Check if document appears in library - if tg-show-library-documents | grep -q "$expected_name"; then - echo "✓ Document loaded successfully" - else - echo "✗ Document not found in library" - return 1 - fi -} - -# Monitor batch loading -for pdf in *.pdf; do - name=$(basename "$pdf" .pdf) - check_load_status "$pdf" "$name" -done -``` - -### Content Verification -```bash -# Verify PDF content is accessible -verify_pdf_content() { - local pdf_name="$1" - local test_query="$2" - - echo "Verifying content for: $pdf_name" - - # Try to query the document - result=$(tg-invoke-document-rag -q "$test_query" -C "default" 2>/dev/null) - - if [ $? -eq 0 ] && [ -n "$result" ]; then - echo "✓ Content accessible via RAG" - else - echo "✗ Content not accessible" - return 1 - fi -} - -# Verify loaded documents -verify_pdf_content "Technical Manual" "What is the installation process?" -``` - -## Performance Optimization - -### Parallel Loading -```bash -# Load multiple PDFs in parallel -pdf_files=(document1.pdf document2.pdf document3.pdf) -for pdf in "${pdf_files[@]}"; do - ( - echo "Loading $pdf in background..." - tg-load-pdf \ - --name "$(basename "$pdf" .pdf)" \ - --collection "batch-$(date +%Y%m%d)" \ - "$pdf" - ) & -done -wait -echo "All PDFs loaded" -``` - -### Size-Based Processing -```bash -# Process files based on size -for pdf in *.pdf; do - size=$(stat -c%s "$pdf") - if [ $size -lt 10485760 ]; then # < 10MB - echo "Processing small file: $pdf" - tg-load-pdf --collection "small-docs" "$pdf" - else - echo "Processing large file: $pdf" - tg-load-pdf --collection "large-docs" "$pdf" - fi -done -``` - -## Document Organization - -### Collection Management -```bash -# Organize by document type -organize_by_type() { - local pdf="$1" - local filename=$(basename "$pdf" .pdf) - - case "$filename" in - *manual*|*guide*) collection="manuals" ;; - *report*|*analysis*) collection="reports" ;; - *spec*|*specification*) collection="specifications" ;; - *legal*|*contract*) collection="legal" ;; - *) collection="general" ;; - esac - - tg-load-pdf \ - --collection "$collection" \ - --name "$filename" \ - "$pdf" -} - -# Process all PDFs -for pdf in *.pdf; do - organize_by_type "$pdf" -done -``` - -### Metadata Standardization -```bash -# Apply consistent metadata standards -standardize_metadata() { - local pdf="$1" - local dept="$2" - local year="$3" - - local name=$(basename "$pdf" .pdf) - local collection="$dept-$(date +%Y)" - - tg-load-pdf \ - --name "$name" \ - --description "$dept document from $year" \ - --copyright-holder "Company Name" \ - --copyright-year "$year" \ - --collection "$collection" \ - --keyword "$dept" --keyword "$year" \ - "$pdf" -} - -# Usage -standardize_metadata "finance-report.pdf" "finance" "2023" -``` - -## Integration with Other Services - -### Library Integration -```bash -# Alternative approach using library services -load_via_library() { - local pdf="$1" - local name="$2" - - # Add to library first - tg-add-library-document \ - --name "$name" \ - --file "$pdf" \ - --collection "documents" - - # Start processing - tg-start-library-processing \ - --collection "documents" -} -``` - -### Workflow Integration -```bash -# Complete document workflow -process_document_workflow() { - local pdf="$1" - local name="$2" - - echo "Starting document workflow for: $name" - - # 1. Load PDF - tg-load-pdf --name "$name" "$pdf" - - # 2. Wait for processing - sleep 5 - - # 3. Verify availability - if tg-show-library-documents | grep -q "$name"; then - echo "Document available in library" - - # 4. Test RAG functionality - tg-invoke-document-rag -q "What is this document about?" - - # 5. Extract key information - tg-invoke-prompt extract-key-points \ - text="Document: $name" \ - format="bullet_points" - else - echo "Document processing failed" - fi -} -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-add-library-document`](tg-add-library-document.md) - Add documents to library -- [`tg-start-library-processing`](tg-start-library-processing.md) - Process library documents -- [`tg-show-library-documents`](tg-show-library-documents.md) - List library documents -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Query document content -- [`tg-show-flows`](tg-show-flows.md) - Monitor processing flows - -## API Integration - -This command uses the document loading API to process PDF files and make them available for text extraction, search, and analysis. - -## Best Practices - -1. **Metadata Completeness**: Provide comprehensive metadata for better organization -2. **Collection Organization**: Use logical collections for document categorization -3. **Error Handling**: Implement robust error handling for batch operations -4. **Performance**: Consider file sizes and processing capacity -5. **Monitoring**: Verify successful loading and processing -6. **Security**: Ensure sensitive documents are properly protected -7. **Backup**: Maintain backups of source PDFs - -## Troubleshooting - -### PDF Processing Issues -```bash -# Check PDF validity -file document.pdf -pdfinfo document.pdf - -# Try alternative PDF processors -qpdf --check document.pdf -``` - -### Memory Issues -```bash -# For large PDFs, monitor memory usage -free -h -# Consider processing large files separately -``` - -### Content Extraction Problems -```bash -# Verify PDF contains extractable text -pdftotext document.pdf test-output.txt -cat test-output.txt | head -20 -``` \ No newline at end of file diff --git a/docs/cli/tg-load-sample-documents.md b/docs/cli/tg-load-sample-documents.md deleted file mode 100644 index 44227865..00000000 --- a/docs/cli/tg-load-sample-documents.md +++ /dev/null @@ -1,567 +0,0 @@ -# tg-load-sample-documents - -Loads predefined sample documents into TrustGraph library for testing and demonstration purposes. - -## Synopsis - -```bash -tg-load-sample-documents [options] -``` - -## Description - -The `tg-load-sample-documents` command loads a curated set of sample documents into TrustGraph's document library. These documents include academic papers, government reports, and reference materials that demonstrate TrustGraph's capabilities and provide data for testing and evaluation. - -The command downloads documents from public sources and adds them to the library with comprehensive metadata including RDF triples for semantic relationships. - -## Options - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID for document ownership (default: `trustgraph`) - -## Examples - -### Basic Loading -```bash -tg-load-sample-documents -``` - -### Load with Custom User -```bash -tg-load-sample-documents -U "demo-user" -``` - -### Load to Custom Environment -```bash -tg-load-sample-documents -u http://demo.trustgraph.ai:8088/ -``` - -## Sample Documents - -The command loads the following sample documents: - -### 1. NASA Challenger Report -- **Title**: Report of the Presidential Commission on the Space Shuttle Challenger Accident, Volume 1 -- **Topics**: Safety engineering, space shuttle, NASA -- **Format**: PDF -- **Source**: NASA Technical Reports Server -- **Use Case**: Demonstrates technical document processing and safety analysis - -### 2. Old Icelandic Dictionary -- **Title**: A Concise Dictionary of Old Icelandic -- **Topics**: Language, linguistics, Old Norse, grammar -- **Format**: PDF -- **Publication**: 1910, Clarendon Press -- **Use Case**: Historical document processing and linguistic analysis - -### 3. US Intelligence Threat Assessment -- **Title**: Annual Threat Assessment of the U.S. Intelligence Community - March 2025 -- **Topics**: National security, cyberthreats, geopolitics -- **Format**: PDF -- **Source**: Director of National Intelligence -- **Use Case**: Current affairs analysis and security research - -### 4. Intelligence and State Policy -- **Title**: The Role of Intelligence and State Policies in International Security -- **Topics**: Intelligence, international security, state policy -- **Format**: PDF (sample) -- **Publication**: Cambridge Scholars Publishing, 2021 -- **Use Case**: Academic research and policy analysis - -### 5. Globalization and Intelligence -- **Title**: Beyond the Vigilant State: Globalisation and Intelligence -- **Topics**: Intelligence, globalization, security studies -- **Format**: PDF -- **Author**: Richard J. Aldrich -- **Use Case**: Academic paper analysis and research - -## Use Cases - -### Demo Environment Setup -```bash -# Set up demonstration environment -setup_demo_environment() { - echo "Setting up TrustGraph demo environment..." - - # Initialize system - tg-init-trustgraph - - # Load sample documents - echo "Loading sample documents..." - tg-load-sample-documents -U "demo" - - # Wait for processing - echo "Waiting for document processing..." - sleep 60 - - # Start document processing - echo "Starting document processing..." - tg-show-library-documents -U "demo" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - proc_id="demo_proc_$(date +%s)_${doc_id}" - tg-start-library-processing -d "$doc_id" --id "$proc_id" -U "demo" - done - - echo "Demo environment ready!" - echo "Try: tg-invoke-document-rag -q 'What caused the Challenger accident?' -U demo" -} -``` - -### Testing Data Pipeline -```bash -# Test complete document processing pipeline -test_document_pipeline() { - echo "Testing document processing pipeline..." - - # Load sample documents - tg-load-sample-documents -U "test" - - # List loaded documents - echo "Loaded documents:" - tg-show-library-documents -U "test" - - # Start processing for each document - tg-show-library-documents -U "test" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - echo "Processing document: $doc_id" - proc_id="test_$(date +%s)_${doc_id}" - tg-start-library-processing -d "$doc_id" --id "$proc_id" -U "test" - done - - # Wait for processing - echo "Processing documents... (this may take several minutes)" - sleep 300 - - # Test document queries - echo "Testing document queries..." - - test_queries=( - "What is the Challenger accident?" - "What is Old Icelandic?" - "What are the main cybersecurity threats?" - "What is intelligence policy?" - ) - - for query in "${test_queries[@]}"; do - echo "Query: $query" - tg-invoke-document-rag -q "$query" -U "test" | head -5 - echo "---" - done - - echo "Pipeline test complete!" -} -``` - -### Educational Environment -```bash -# Set up educational/training environment -setup_educational_environment() { - local class_name="$1" - - echo "Setting up educational environment for: $class_name" - - # Create user for the class - class_user=$(echo "$class_name" | tr '[:upper:]' '[:lower:]' | tr ' ' '-') - - # Load sample documents for the class - tg-load-sample-documents -U "$class_user" - - # Process documents - echo "Processing documents for educational use..." - tg-show-library-documents -U "$class_user" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - proc_id="edu_$(date +%s)_${doc_id}" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - -U "$class_user" \ - --collection "education" - done - - echo "Educational environment ready for: $class_name" - echo "User: $class_user" - echo "Collection: education" -} - -# Set up for different classes -setup_educational_environment "AI Research Methods" -setup_educational_environment "Security Studies" -``` - -### Benchmarking and Performance Testing -```bash -# Benchmark document processing performance -benchmark_processing() { - echo "Starting document processing benchmark..." - - # Load sample documents - start_time=$(date +%s) - tg-load-sample-documents -U "benchmark" - load_time=$(date +%s) - - echo "Document loading time: $((load_time - start_time))s" - - # Count documents - doc_count=$(tg-show-library-documents -U "benchmark" | grep -c "| id") - echo "Documents loaded: $doc_count" - - # Start processing - processing_ids=() - tg-show-library-documents -U "benchmark" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - proc_id="bench_$(date +%s)_${doc_id}" - processing_ids+=("$proc_id") - tg-start-library-processing -d "$doc_id" --id "$proc_id" -U "benchmark" - done - - processing_start=$(date +%s) - - # Monitor processing completion - echo "Monitoring processing completion..." - while true; do - active_processing=$(tg-show-flows | grep -c "bench_" || echo "0") - - if [ "$active_processing" -eq 0 ]; then - break - fi - - echo "Active processing jobs: $active_processing" - sleep 30 - done - - processing_end=$(date +%s) - - echo "Processing completion time: $((processing_end - processing_start))s" - echo "Total benchmark time: $((processing_end - start_time))s" - - # Test query performance - echo "Testing query performance..." - query_start=$(date +%s) - - for i in {1..10}; do - tg-invoke-document-rag \ - -q "What are the main topics in these documents?" \ - -U "benchmark" > /dev/null - done - - query_end=$(date +%s) - echo "Average query time: $(echo "scale=2; ($query_end - $query_start) / 10" | bc)s" -} -``` - -## Advanced Usage - -### Selective Document Loading -```bash -# Load only specific types of documents -load_by_category() { - local category="$1" - - case "$category" in - "government") - echo "Loading government documents..." - # This would require modifying the script to load selectively - # For now, we load all and filter by tags later - tg-load-sample-documents -U "gov-docs" - ;; - "academic") - echo "Loading academic documents..." - tg-load-sample-documents -U "academic-docs" - ;; - "historical") - echo "Loading historical documents..." - tg-load-sample-documents -U "historical-docs" - ;; - *) - echo "Loading all sample documents..." - tg-load-sample-documents - ;; - esac -} - -# Load by category -load_by_category "government" -load_by_category "academic" -``` - -### Multi-Environment Loading -```bash -# Load sample documents to multiple environments -multi_environment_setup() { - local environments=("dev" "staging" "demo") - - for env in "${environments[@]}"; do - echo "Setting up $env environment..." - - tg-load-sample-documents \ - -u "http://$env.trustgraph.company.com:8088/" \ - -U "sample-data" - - echo "✓ $env environment loaded" - done - - echo "All environments loaded with sample documents" -} -``` - -### Custom Document Sets -```bash -# Create custom document loading scripts based on the sample -create_custom_loader() { - local domain="$1" - - cat > "load-${domain}-documents.py" << 'EOF' -#!/usr/bin/env python3 -""" -Custom document loader for specific domain -Based on tg-load-sample-documents -""" - -import argparse -import os -from trustgraph.api import Api - -# Define your own document set here -documents = [ - { - "id": "https://example.com/doc/custom-1", - "title": "Custom Document 1", - "url": "https://example.com/docs/custom1.pdf", - # Add your document definitions... - } -] - -# Rest of the implementation similar to tg-load-sample-documents -EOF - - echo "Custom loader created: load-${domain}-documents.py" -} - -# Create custom loaders for different domains -create_custom_loader "medical" -create_custom_loader "legal" -create_custom_loader "technical" -``` - -## Document Analysis - -### Content Analysis -```bash -# Analyze loaded sample documents -analyze_sample_documents() { - echo "Analyzing sample documents..." - - # Get document statistics - total_docs=$(tg-show-library-documents | grep -c "| id") - echo "Total documents: $total_docs" - - # Analyze by type - echo "Document types:" - tg-show-library-documents | \ - grep "| kind" | \ - awk '{print $3}' | \ - sort | uniq -c - - # Analyze tags - echo "Popular tags:" - tg-show-library-documents | \ - grep "| tags" | \ - sed 's/.*| tags.*| \(.*\) |.*/\1/' | \ - tr ',' '\n' | \ - sed 's/^ *//;s/ *$//' | \ - sort | uniq -c | sort -nr | head -10 - - # Document sizes (would need additional API) - echo "Document analysis complete" -} -``` - -### Query Testing -```bash -# Test sample documents with various queries -test_sample_queries() { - echo "Testing sample document queries..." - - # Define test queries for different domains - queries=( - "What caused the Challenger space shuttle accident?" - "What is Old Norse language?" - "What are current cybersecurity threats?" - "How does globalization affect intelligence services?" - "What are the main security challenges in international relations?" - ) - - for query in "${queries[@]}"; do - echo "Testing query: $query" - echo "====================" - - result=$(tg-invoke-document-rag -q "$query" 2>/dev/null) - - if [ $? -eq 0 ]; then - echo "$result" | head -3 - echo "✓ Query successful" - else - echo "✗ Query failed" - fi - - echo "" - done -} -``` - -## Error Handling - -### Network Issues -```bash -Exception: Connection failed during download -``` -**Solution**: Check internet connectivity and retry. Documents are cached locally after first download. - -### Insufficient Storage -```bash -Exception: No space left on device -``` -**Solution**: Free up disk space. Sample documents total approximately 50-100MB. - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Verify TrustGraph API is running and accessible. - -### Processing Failures -```bash -Exception: Document processing failed -``` -**Solution**: Check TrustGraph service logs and ensure all components are running. - -## Monitoring and Validation - -### Loading Progress -```bash -# Monitor sample document loading -monitor_sample_loading() { - echo "Starting sample document loading with monitoring..." - - # Start loading in background - tg-load-sample-documents & - load_pid=$! - - # Monitor progress - while kill -0 $load_pid 2>/dev/null; do - doc_count=$(tg-show-library-documents 2>/dev/null | grep -c "| id" || echo "0") - echo "Documents loaded so far: $doc_count" - sleep 10 - done - - wait $load_pid - - if [ $? -eq 0 ]; then - final_count=$(tg-show-library-documents | grep -c "| id") - echo "✓ Loading completed successfully" - echo "Total documents loaded: $final_count" - else - echo "✗ Loading failed" - fi -} -``` - -### Validation -```bash -# Validate sample document loading -validate_sample_loading() { - echo "Validating sample document loading..." - - # Expected document count (based on current sample set) - expected_docs=5 - - # Check actual count - actual_docs=$(tg-show-library-documents | grep -c "| id") - - if [ "$actual_docs" -eq "$expected_docs" ]; then - echo "✓ Document count correct: $actual_docs" - else - echo "⚠ Document count mismatch: expected $expected_docs, got $actual_docs" - fi - - # Check for expected documents - expected_titles=( - "Challenger" - "Icelandic" - "Intelligence" - "Threat Assessment" - "Vigilant State" - ) - - for title in "${expected_titles[@]}"; do - if tg-show-library-documents | grep -q "$title"; then - echo "✓ Found document containing: $title" - else - echo "✗ Missing document containing: $title" - fi - done - - echo "Validation complete" -} -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-library-documents`](tg-show-library-documents.md) - List loaded documents -- [`tg-start-library-processing`](tg-start-library-processing.md) - Process loaded documents -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Query processed documents -- [`tg-load-pdf`](tg-load-pdf.md) - Load individual PDF documents - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to add sample documents to TrustGraph's document repository. - -## Best Practices - -1. **Demo Preparation**: Use for setting up demonstration environments -2. **Testing**: Ideal for testing document processing pipelines -3. **Education**: Excellent for training and educational purposes -4. **Development**: Use in development environments for consistent test data -5. **Benchmarking**: Suitable for performance testing and optimization -6. **Documentation**: Great for documenting TrustGraph capabilities - -## Troubleshooting - -### Download Failures -```bash -# Check document URLs are accessible -curl -I "https://ntrs.nasa.gov/api/citations/19860015255/downloads/19860015255.pdf" - -# Check local cache -ls -la doc-cache/ -``` - -### Processing Issues -```bash -# Check document processing status -tg-show-library-processing - -# Verify documents are in library -tg-show-library-documents | grep -E "(Challenger|Icelandic|Intelligence)" -``` - -### Performance Problems -```bash -# Monitor system resources during loading -top -df -h -``` \ No newline at end of file diff --git a/docs/cli/tg-load-text.md b/docs/cli/tg-load-text.md deleted file mode 100644 index 765cb80a..00000000 --- a/docs/cli/tg-load-text.md +++ /dev/null @@ -1,211 +0,0 @@ -# tg-load-text - -Loads text documents into TrustGraph processing pipelines with rich metadata support. - -## Synopsis - -```bash -tg-load-text [options] file1 [file2 ...] -``` - -## Description - -The `tg-load-text` command loads text documents into TrustGraph for processing. It creates a SHA256 hash-based document ID and supports comprehensive metadata including copyright information, publication details, and keywords. - -**Note**: Consider using `tg-add-library-document` followed by `tg-start-library-processing` for better document management and processing control. - -## Options - -### Connection & Flow -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id FLOW`: Flow ID for processing (default: `default`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection identifier (default: `default`) - -### Document Metadata -- `--name NAME`: Document name/title -- `--description DESCRIPTION`: Document description -- `--document-url URL`: Document source URL - -### Copyright Information -- `--copyright-notice NOTICE`: Copyright notice text -- `--copyright-holder HOLDER`: Copyright holder name -- `--copyright-year YEAR`: Copyright year -- `--license LICENSE`: Copyright license - -### Publication Information -- `--publication-organization ORG`: Publishing organization -- `--publication-description DESC`: Publication description -- `--publication-date DATE`: Publication date - -### Keywords -- `--keyword KEYWORD [KEYWORD ...]`: Document keywords (can specify multiple) - -## Arguments - -- `file1 [file2 ...]`: One or more text files to load - -## Examples - -### Basic Document Loading -```bash -tg-load-text document.txt -``` - -### Loading with Metadata -```bash -tg-load-text \ - --name "Research Paper on AI" \ - --description "Comprehensive study of machine learning algorithms" \ - --keyword "AI" "machine learning" "research" \ - research-paper.txt -``` - -### Complete Metadata Example -```bash -tg-load-text \ - --name "TrustGraph Documentation" \ - --description "Complete user guide for TrustGraph system" \ - --copyright-holder "TrustGraph Project" \ - --copyright-year "2024" \ - --license "MIT" \ - --publication-organization "TrustGraph Foundation" \ - --publication-date "2024-01-15" \ - --keyword "documentation" "guide" "tutorial" \ - --flow-id research-flow \ - trustgraph-guide.txt -``` - -### Multiple Files -```bash -tg-load-text chapter1.txt chapter2.txt chapter3.txt -``` - -### Custom Flow and Collection -```bash -tg-load-text \ - --flow-id medical-research \ - --user researcher \ - --collection medical-papers \ - medical-study.txt -``` - -## Output - -For each file processed, the command outputs: - -### Success -``` -document.txt: Loaded successfully. -``` - -### Failure -``` -document.txt: Failed: Connection refused -``` - -## Document Processing - -1. **File Reading**: Reads the text file content -2. **Hash Generation**: Creates SHA256 hash for unique document ID -3. **URI Creation**: Converts hash to document URI format -4. **Metadata Assembly**: Combines all metadata into RDF triples -5. **API Submission**: Sends to TrustGraph via Text Load API - -## Document ID Generation - -Documents are assigned IDs based on their content hash: -- SHA256 hash of file content -- Converted to TrustGraph document URI format -- Example: `http://trustgraph.ai/d/abc123...` - -## Metadata Format - -The metadata is stored as RDF triples including: - -### Standard Properties -- `dc:title`: Document name -- `dc:description`: Document description -- `dc:creator`: Copyright holder -- `dc:date`: Publication date -- `dc:rights`: Copyright notice -- `dc:license`: License information - -### Keywords -- `dc:subject`: Each keyword as separate triple - -### Organization Information -- `foaf:Organization`: Publication organization details - -## Error Handling - -### File Errors -```bash -document.txt: Failed: No such file or directory -``` -**Solution**: Verify the file path exists and is readable. - -### Connection Errors -```bash -document.txt: Failed: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Flow Errors -```bash -document.txt: Failed: Invalid flow -``` -**Solution**: Verify the flow exists and is running using `tg-show-flows`. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-add-library-document`](tg-add-library-document.md) - Add documents to library (recommended) -- [`tg-load-pdf`](tg-load-pdf.md) - Load PDF documents -- [`tg-show-library-documents`](tg-show-library-documents.md) - List loaded documents -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start document processing - -## API Integration - -This command uses the [Text Load API](../apis/api-text-load.md) to submit documents for processing. The text content is base64-encoded for transmission. - -## Use Cases - -### Academic Research -```bash -tg-load-text \ - --name "Climate Change Impact Study" \ - --publication-organization "University Research Center" \ - --keyword "climate" "research" "environment" \ - climate-study.txt -``` - -### Corporate Documentation -```bash -tg-load-text \ - --name "Product Manual" \ - --copyright-holder "Acme Corp" \ - --license "Proprietary" \ - --keyword "manual" "product" "guide" \ - product-manual.txt -``` - -### Technical Documentation -```bash -tg-load-text \ - --name "API Reference" \ - --description "Complete API documentation" \ - --keyword "API" "reference" "technical" \ - api-docs.txt -``` - -## Best Practices - -1. **Use Descriptive Names**: Provide clear document names and descriptions -2. **Add Keywords**: Include relevant keywords for better searchability -3. **Complete Metadata**: Fill in copyright and publication information -4. **Batch Processing**: Load multiple related files together -5. **Use Collections**: Organize documents by topic or project using collections \ No newline at end of file diff --git a/docs/cli/tg-load-turtle.md b/docs/cli/tg-load-turtle.md deleted file mode 100644 index be1a7d42..00000000 --- a/docs/cli/tg-load-turtle.md +++ /dev/null @@ -1,505 +0,0 @@ -# tg-load-turtle - -Loads RDF triples from Turtle files into the TrustGraph knowledge graph. - -## Synopsis - -```bash -tg-load-turtle -i DOCUMENT_ID [options] file1.ttl [file2.ttl ...] -``` - -## Description - -The `tg-load-turtle` command loads RDF triples from Turtle (TTL) format files into TrustGraph's knowledge graph. It parses Turtle files, converts them to TrustGraph's internal triple format, and imports them using WebSocket connections for efficient batch processing. - -The command supports retry logic and automatic reconnection to handle network interruptions during large data imports. - -## Options - -### Required Arguments - -- `-i, --document-id ID`: Document ID to associate with the triples -- `files`: One or more Turtle files to load - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `ws://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to use (default: `default`) -- `-U, --user USER`: User ID for triple ownership (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection to assign triples (default: `default`) - -## Examples - -### Basic Turtle Loading -```bash -tg-load-turtle -i "doc123" knowledge-base.ttl -``` - -### Multiple Files -```bash -tg-load-turtle -i "ontology-v1" \ - schema.ttl \ - instances.ttl \ - relationships.ttl -``` - -### Custom Flow and Collection -```bash -tg-load-turtle \ - -i "research-data" \ - -f "knowledge-import-flow" \ - -U "research-team" \ - -C "research-kg" \ - research-triples.ttl -``` - -### Load with Custom API URL -```bash -tg-load-turtle \ - -i "production-data" \ - -u "ws://production:8088/" \ - production-ontology.ttl -``` - -## Turtle Format Support - -### Basic Triples -```turtle -@prefix ex: . -@prefix rdf: . -@prefix rdfs: . - -ex:Person rdf:type rdfs:Class . -ex:john rdf:type ex:Person . -ex:john ex:name "John Doe" . -ex:john ex:age "30"^^xsd:integer . -``` - -### Complex Structures -```turtle -@prefix org: . -@prefix foaf: . - -org:TechCorp rdf:type foaf:Organization ; - foaf:name "Technology Corporation" ; - org:hasEmployee org:john, org:jane ; - org:foundedYear "2010"^^xsd:gYear . - -org:john foaf:name "John Smith" ; - foaf:mbox ; - org:position "Software Engineer" . -``` - -### Ontology Loading -```turtle -@prefix owl: . -@prefix dc: . - - rdf:type owl:Ontology ; - dc:title "Example Ontology" ; - dc:creator "Knowledge Team" . - -ex:Vehicle rdf:type owl:Class ; - rdfs:label "Vehicle" ; - rdfs:comment "A means of transportation" . - -ex:Car rdfs:subClassOf ex:Vehicle . -ex:Truck rdfs:subClassOf ex:Vehicle . -``` - -## Data Processing - -### Triple Conversion -The loader converts Turtle triples to TrustGraph format: -- **URIs**: Converted to URI references with `is_uri=true` -- **Literals**: Converted to literal values with `is_uri=false` -- **Datatypes**: Preserved in literal values - -### Batch Processing -- Triples are sent individually via WebSocket -- Each triple includes document metadata -- Automatic retry on connection failures -- Progress tracking for large files - -### Error Handling -- Invalid Turtle syntax causes parsing errors -- Network interruptions trigger automatic retry -- Malformed triples are skipped with warnings - -## Use Cases - -### Ontology Import -```bash -# Load domain ontology -tg-load-turtle -i "healthcare-ontology" \ - -C "ontologies" \ - healthcare-schema.ttl - -# Load instance data -tg-load-turtle -i "patient-data" \ - -C "healthcare-data" \ - patient-records.ttl -``` - -### Knowledge Base Migration -```bash -# Migrate from external knowledge base -tg-load-turtle -i "migration-$(date +%Y%m%d)" \ - -C "migrated-data" \ - exported-knowledge.ttl -``` - -### Research Data Loading -```bash -# Load research datasets -datasets=("publications" "authors" "citations") -for dataset in "${datasets[@]}"; do - tg-load-turtle -i "research-$dataset" \ - -C "research-data" \ - "$dataset.ttl" -done -``` - -### Structured Data Import -```bash -# Load structured data from various sources -tg-load-turtle -i "products" -C "catalog" product-catalog.ttl -tg-load-turtle -i "customers" -C "crm" customer-data.ttl -tg-load-turtle -i "orders" -C "transactions" order-history.ttl -``` - -## Advanced Usage - -### Batch Processing Multiple Files -```bash -# Process all Turtle files in directory -for ttl in *.ttl; do - doc_id=$(basename "$ttl" .ttl) - echo "Loading $ttl as document $doc_id..." - - tg-load-turtle -i "$doc_id" \ - -C "bulk-import-$(date +%Y%m%d)" \ - "$ttl" -done -``` - -### Parallel Loading -```bash -# Load multiple files in parallel -ttl_files=(schema.ttl instances.ttl relationships.ttl) -for ttl in "${ttl_files[@]}"; do - ( - doc_id=$(basename "$ttl" .ttl) - echo "Loading $ttl in background..." - tg-load-turtle -i "parallel-$doc_id" \ - -C "parallel-import" \ - "$ttl" - ) & -done -wait -echo "All files loaded" -``` - -### Size-Based Processing -```bash -# Handle large files differently -for ttl in *.ttl; do - size=$(stat -c%s "$ttl") - doc_id=$(basename "$ttl" .ttl) - - if [ $size -lt 10485760 ]; then # < 10MB - echo "Processing small file: $ttl" - tg-load-turtle -i "$doc_id" -C "small-files" "$ttl" - else - echo "Processing large file: $ttl" - # Use dedicated collection for large files - tg-load-turtle -i "$doc_id" -C "large-files" "$ttl" - fi -done -``` - -### Validation and Loading -```bash -# Validate before loading -validate_and_load() { - local ttl_file="$1" - local doc_id="$2" - - echo "Validating $ttl_file..." - - # Check Turtle syntax - if rapper -q -i turtle "$ttl_file" > /dev/null 2>&1; then - echo "✓ Valid Turtle syntax" - - # Count triples - triple_count=$(rapper -i turtle -c "$ttl_file" 2>/dev/null) - echo " Triples: $triple_count" - - # Load if valid - echo "Loading $ttl_file..." - tg-load-turtle -i "$doc_id" -C "validated-data" "$ttl_file" - else - echo "✗ Invalid Turtle syntax in $ttl_file" - return 1 - fi -} - -# Validate and load all files -for ttl in *.ttl; do - doc_id=$(basename "$ttl" .ttl) - validate_and_load "$ttl" "$doc_id" -done -``` - -## Error Handling - -### Invalid Turtle Syntax -```bash -Exception: Turtle parsing failed -``` -**Solution**: Validate Turtle syntax with tools like `rapper` or `rdflib`. - -### Document ID Required -```bash -Exception: Document ID is required -``` -**Solution**: Provide document ID with `-i` option. - -### WebSocket Connection Issues -```bash -Exception: WebSocket connection failed -``` -**Solution**: Check API URL and ensure TrustGraph WebSocket service is running. - -### File Not Found -```bash -Exception: [Errno 2] No such file or directory -``` -**Solution**: Verify file paths and ensure Turtle files exist. - -### Flow Not Found -```bash -Exception: Flow instance not found -``` -**Solution**: Verify flow ID with `tg-show-flows`. - -## Monitoring and Verification - -### Load Progress Tracking -```bash -# Monitor loading progress -monitor_load() { - local ttl_file="$1" - local doc_id="$2" - - echo "Starting load: $ttl_file" - start_time=$(date +%s) - - tg-load-turtle -i "$doc_id" -C "monitored" "$ttl_file" - - end_time=$(date +%s) - duration=$((end_time - start_time)) - - echo "Load completed in ${duration}s" - - # Verify data is accessible - if tg-triples-query -s "http://example.org/test" > /dev/null 2>&1; then - echo "✓ Data accessible via query" - else - echo "✗ Data not accessible" - fi -} -``` - -### Data Verification -```bash -# Verify loaded triples -verify_triples() { - local collection="$1" - local expected_count="$2" - - echo "Verifying triples in collection: $collection" - - # Query for triples - actual_count=$(tg-triples-query -C "$collection" | wc -l) - - if [ "$actual_count" -ge "$expected_count" ]; then - echo "✓ Expected triples found ($actual_count >= $expected_count)" - else - echo "✗ Missing triples ($actual_count < $expected_count)" - return 1 - fi -} -``` - -### Content Analysis -```bash -# Analyze loaded content -analyze_turtle_content() { - local ttl_file="$1" - - echo "Analyzing content: $ttl_file" - - # Extract prefixes - echo "Prefixes:" - grep "^@prefix" "$ttl_file" | head -5 - - # Count statements - statement_count=$(grep -c "\." "$ttl_file") - echo "Statements: $statement_count" - - # Extract subjects - echo "Sample subjects:" - grep -o "^[^[:space:]]*" "$ttl_file" | grep -v "^@" | sort | uniq | head -5 -} -``` - -## Performance Optimization - -### Connection Pooling -```bash -# Reuse WebSocket connections for multiple files -load_batch_optimized() { - local collection="$1" - shift - local files=("$@") - - echo "Loading ${#files[@]} files to collection: $collection" - - # Process files in batches to reuse connections - for ((i=0; i<${#files[@]}; i+=5)); do - batch=("${files[@]:$i:5}") - - echo "Processing batch $((i/5 + 1))..." - for ttl in "${batch[@]}"; do - doc_id=$(basename "$ttl" .ttl) - tg-load-turtle -i "$doc_id" -C "$collection" "$ttl" & - done - wait - done -} -``` - -### Memory Management -```bash -# Handle large files with memory monitoring -load_with_memory_check() { - local ttl_file="$1" - local doc_id="$2" - - # Check available memory - available=$(free -m | awk 'NR==2{print $7}') - if [ "$available" -lt 1000 ]; then - echo "Warning: Low memory ($available MB). Consider splitting file." - fi - - # Monitor memory during load - tg-load-turtle -i "$doc_id" -C "memory-monitored" "$ttl_file" & - load_pid=$! - - while kill -0 $load_pid 2>/dev/null; do - memory_usage=$(ps -p $load_pid -o rss= | awk '{print $1/1024}') - echo "Memory usage: ${memory_usage}MB" - sleep 5 - done -} -``` - -## Data Preparation - -### Turtle File Preparation -```bash -# Clean and prepare Turtle files -prepare_turtle() { - local input_file="$1" - local output_file="$2" - - echo "Preparing $input_file -> $output_file" - - # Remove comments and empty lines - sed '/^#/d; /^$/d' "$input_file" > "$output_file" - - # Validate output - if rapper -q -i turtle "$output_file" > /dev/null 2>&1; then - echo "✓ Prepared file is valid" - else - echo "✗ Prepared file is invalid" - return 1 - fi -} -``` - -### Data Splitting -```bash -# Split large Turtle files -split_turtle() { - local input_file="$1" - local lines_per_file="$2" - - echo "Splitting $input_file into chunks of $lines_per_file lines" - - # Split file - split -l "$lines_per_file" "$input_file" "$(basename "$input_file" .ttl)_part_" - - # Add .ttl extension to parts - for part in $(basename "$input_file" .ttl)_part_*; do - mv "$part" "$part.ttl" - done -} -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL (WebSocket format) - -## Related Commands - -- [`tg-triples-query`](tg-triples-query.md) - Query loaded triples -- [`tg-graph-to-turtle`](tg-graph-to-turtle.md) - Export graph to Turtle format -- [`tg-show-flows`](tg-show-flows.md) - Monitor processing flows -- [`tg-load-pdf`](tg-load-pdf.md) - Load document content - -## API Integration - -This command uses TrustGraph's WebSocket-based triple import API for efficient batch loading of RDF data. - -## Best Practices - -1. **Validation**: Always validate Turtle syntax before loading -2. **Document IDs**: Use meaningful, unique document identifiers -3. **Collections**: Organize triples into logical collections -4. **Error Handling**: Implement retry logic for network issues -5. **Performance**: Consider file sizes and system resources -6. **Monitoring**: Track loading progress and verify results -7. **Backup**: Maintain backups of source Turtle files - -## Troubleshooting - -### WebSocket Connection Issues -```bash -# Test WebSocket connectivity -wscat -c ws://localhost:8088/api/v1/flow/default/import/triples - -# Check WebSocket service status -tg-show-flows | grep -i websocket -``` - -### Parsing Errors -```bash -# Validate Turtle syntax -rapper -i turtle -q file.ttl - -# Check for common issues -grep -n "^[[:space:]]*@prefix" file.ttl # Check prefixes -grep -n "\.$" file.ttl | head -5 # Check statement terminators -``` - -### Memory Issues -```bash -# Monitor memory usage -free -h -ps aux | grep tg-load-turtle - -# Split large files if needed -split -l 10000 large-file.ttl chunk_ -``` \ No newline at end of file diff --git a/docs/cli/tg-put-flow-blueprint.md b/docs/cli/tg-put-flow-blueprint.md deleted file mode 100644 index 059d4efa..00000000 --- a/docs/cli/tg-put-flow-blueprint.md +++ /dev/null @@ -1,406 +0,0 @@ -# tg-put-flow-blueprint - -Uploads or updates a flow blueprint definition in TrustGraph. - -## Synopsis - -```bash -tg-put-flow-blueprint -n CLASS_NAME -c CONFIG_JSON [options] -``` - -## Description - -The `tg-put-flow-blueprint` command creates or updates a flow blueprint definition in TrustGraph. Flow blueprintes are templates that define processing pipeline configurations, service interfaces, and resource requirements. These classes are used by `tg-start-flow` to create running flow instances. - -Flow blueprintes define the structure and capabilities of processing flows, including which services are available and how they connect to Pulsar queues. - -## Options - -### Required Arguments - -- `-n, --blueprint-name CLASS_NAME`: Name for the flow blueprint -- `-c, --config CONFIG_JSON`: Flow blueprint configuration as raw JSON string - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Basic Flow Blueprint Creation -```bash -tg-put-flow-blueprint \ - -n "simple-processing" \ - -c '{"description": "Simple text processing flow", "interfaces": {"text-completion": {"request": "non-persistent://tg/request/text-completion:simple", "response": "non-persistent://tg/response/text-completion:simple"}}}' -``` - -### Document Processing Flow Blueprint -```bash -tg-put-flow-blueprint \ - -n "document-analysis" \ - -c '{ - "description": "Document analysis and RAG processing", - "interfaces": { - "document-rag": { - "request": "non-persistent://tg/request/document-rag:doc-analysis", - "response": "non-persistent://tg/response/document-rag:doc-analysis" - }, - "text-load": "persistent://tg/flow/text-document-load:doc-analysis", - "document-load": "persistent://tg/flow/document-load:doc-analysis" - } - }' -``` - -### Loading from File -```bash -# Create configuration file -cat > research-flow.json << 'EOF' -{ - "description": "Research analysis flow with multiple AI services", - "interfaces": { - "agent": { - "request": "non-persistent://tg/request/agent:research", - "response": "non-persistent://tg/response/agent:research" - }, - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:research", - "response": "non-persistent://tg/response/graph-rag:research" - }, - "document-rag": { - "request": "non-persistent://tg/request/document-rag:research", - "response": "non-persistent://tg/response/document-rag:research" - }, - "embeddings": { - "request": "non-persistent://tg/request/embeddings:research", - "response": "non-persistent://tg/response/embeddings:research" - }, - "text-load": "persistent://tg/flow/text-document-load:research", - "triples-store": "persistent://tg/flow/triples-store:research" - } -} -EOF - -# Upload the flow blueprint -tg-put-flow-blueprint -n "research-analysis" -c "$(cat research-flow.json)" -``` - -### Update Existing Flow Blueprint -```bash -# Modify existing flow blueprint by adding new service -tg-put-flow-blueprint \ - -n "existing-flow" \ - -c '{ - "description": "Updated flow with new capabilities", - "interfaces": { - "text-completion": { - "request": "non-persistent://tg/request/text-completion:updated", - "response": "non-persistent://tg/response/text-completion:updated" - }, - "prompt": { - "request": "non-persistent://tg/request/prompt:updated", - "response": "non-persistent://tg/response/prompt:updated" - } - } - }' -``` - -## Flow Blueprint Configuration Format - -### Required Fields - -#### Description -```json -{ - "description": "Human-readable description of the flow blueprint" -} -``` - -#### Interfaces -```json -{ - "interfaces": { - "service-name": "queue-definition-or-object" - } -} -``` - -### Interface Types - -#### Request/Response Services -Services that accept requests and return responses: - -```json -{ - "service-name": { - "request": "pulsar-queue-url", - "response": "pulsar-queue-url" - } -} -``` - -Examples: -- `agent` -- `graph-rag` -- `document-rag` -- `text-completion` -- `prompt` -- `embeddings` -- `graph-embeddings` -- `triples` - -#### Fire-and-Forget Services -Services that accept data without returning responses: - -```json -{ - "service-name": "pulsar-queue-url" -} -``` - -Examples: -- `text-load` -- `document-load` -- `triples-store` -- `graph-embeddings-store` -- `document-embeddings-store` -- `entity-contexts-load` - -### Queue Naming Conventions - -#### Request/Response Queues -``` -non-persistent://tg/request/{service}:{flow-identifier} -non-persistent://tg/response/{service}:{flow-identifier} -``` - -#### Fire-and-Forget Queues -``` -persistent://tg/flow/{service}:{flow-identifier} -``` - -## Complete Example - -### Comprehensive Flow Blueprint -```bash -tg-put-flow-blueprint \ - -n "full-processing-pipeline" \ - -c '{ - "description": "Complete document processing and analysis pipeline", - "interfaces": { - "agent": { - "request": "non-persistent://tg/request/agent:full-pipeline", - "response": "non-persistent://tg/response/agent:full-pipeline" - }, - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:full-pipeline", - "response": "non-persistent://tg/response/graph-rag:full-pipeline" - }, - "document-rag": { - "request": "non-persistent://tg/request/document-rag:full-pipeline", - "response": "non-persistent://tg/response/document-rag:full-pipeline" - }, - "text-completion": { - "request": "non-persistent://tg/request/text-completion:full-pipeline", - "response": "non-persistent://tg/response/text-completion:full-pipeline" - }, - "prompt": { - "request": "non-persistent://tg/request/prompt:full-pipeline", - "response": "non-persistent://tg/response/prompt:full-pipeline" - }, - "embeddings": { - "request": "non-persistent://tg/request/embeddings:full-pipeline", - "response": "non-persistent://tg/response/embeddings:full-pipeline" - }, - "graph-embeddings": { - "request": "non-persistent://tg/request/graph-embeddings:full-pipeline", - "response": "non-persistent://tg/response/graph-embeddings:full-pipeline" - }, - "triples": { - "request": "non-persistent://tg/request/triples:full-pipeline", - "response": "non-persistent://tg/response/triples:full-pipeline" - }, - "text-load": "persistent://tg/flow/text-document-load:full-pipeline", - "document-load": "persistent://tg/flow/document-load:full-pipeline", - "triples-store": "persistent://tg/flow/triples-store:full-pipeline", - "graph-embeddings-store": "persistent://tg/flow/graph-embeddings-store:full-pipeline", - "document-embeddings-store": "persistent://tg/flow/document-embeddings-store:full-pipeline", - "entity-contexts-load": "persistent://tg/flow/entity-contexts-load:full-pipeline" - } - }' -``` - -## Output - -Successful upload typically produces no output: - -```bash -# Upload flow blueprint (no output expected) -tg-put-flow-blueprint -n "my-flow" -c '{"description": "test", "interfaces": {}}' - -# Verify upload -tg-show-flow-blueprints | grep "my-flow" -``` - -## Error Handling - -### Invalid JSON Format -```bash -Exception: Invalid JSON in config parameter -``` -**Solution**: Validate JSON syntax using tools like `jq` or online JSON validators. - -### Missing Required Fields -```bash -Exception: Missing required field 'description' -``` -**Solution**: Ensure configuration includes all required fields (description, interfaces). - -### Invalid Queue Names -```bash -Exception: Invalid queue URL format -``` -**Solution**: Verify queue URLs follow the correct Pulsar format with proper tenant/namespace. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -## Validation - -### JSON Syntax Check -```bash -# Validate JSON before uploading -config='{"description": "test flow", "interfaces": {}}' -echo "$config" | jq . > /dev/null && echo "Valid JSON" || echo "Invalid JSON" -``` - -### Flow Blueprint Verification -```bash -# After uploading, verify the flow blueprint exists -tg-show-flow-blueprints | grep "my-flow-class" - -# Get the flow blueprint definition to verify content -tg-get-flow-blueprint -n "my-flow-class" -``` - -## Flow Blueprint Lifecycle - -### Development Workflow -```bash -# 1. Create flow blueprint -tg-put-flow-blueprint -n "dev-flow" -c "$dev_config" - -# 2. Test with flow instance -tg-start-flow -n "dev-flow" -i "test-instance" -d "Testing" - -# 3. Update flow blueprint as needed -tg-put-flow-blueprint -n "dev-flow" -c "$updated_config" - -# 4. Restart flow instance with updates -tg-stop-flow -i "test-instance" -tg-start-flow -n "dev-flow" -i "test-instance" -d "Testing updated" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve flow blueprint definitions -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes -- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Remove flow blueprint definitions -- [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `put-class` operation to store flow blueprint definitions. - -## Use Cases - -### Custom Processing Pipelines -```bash -# Create specialized medical analysis flow -tg-put-flow-blueprint -n "medical-nlp" -c "$medical_config" -``` - -### Development Environments -```bash -# Create lightweight development flow -tg-put-flow-blueprint -n "dev-minimal" -c "$minimal_config" -``` - -### Production Deployments -```bash -# Create robust production flow with all services -tg-put-flow-blueprint -n "production-full" -c "$production_config" -``` - -### Domain-Specific Workflows -```bash -# Create legal document analysis flow -tg-put-flow-blueprint -n "legal-analysis" -c "$legal_config" -``` - -## Best Practices - -1. **Descriptive Names**: Use clear, descriptive flow blueprint names -2. **Comprehensive Descriptions**: Include detailed descriptions of flow capabilities -3. **Consistent Naming**: Follow consistent queue naming conventions -4. **Version Control**: Store flow blueprint configurations in version control -5. **Testing**: Test flow blueprintes thoroughly before production use -6. **Documentation**: Document flow blueprint purposes and requirements - -## Template Examples - -### Minimal Flow Blueprint -```json -{ - "description": "Minimal text processing flow", - "interfaces": { - "text-completion": { - "request": "non-persistent://tg/request/text-completion:minimal", - "response": "non-persistent://tg/response/text-completion:minimal" - } - } -} -``` - -### RAG-Focused Flow Blueprint -```json -{ - "description": "Retrieval Augmented Generation flow", - "interfaces": { - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:rag-flow", - "response": "non-persistent://tg/response/graph-rag:rag-flow" - }, - "document-rag": { - "request": "non-persistent://tg/request/document-rag:rag-flow", - "response": "non-persistent://tg/response/document-rag:rag-flow" - }, - "embeddings": { - "request": "non-persistent://tg/request/embeddings:rag-flow", - "response": "non-persistent://tg/response/embeddings:rag-flow" - } - } -} -``` - -### Document Processing Flow Blueprint -```json -{ - "description": "Document ingestion and processing flow", - "interfaces": { - "text-load": "persistent://tg/flow/text-document-load:doc-proc", - "document-load": "persistent://tg/flow/document-load:doc-proc", - "triples-store": "persistent://tg/flow/triples-store:doc-proc", - "embeddings": { - "request": "non-persistent://tg/request/embeddings:doc-proc", - "response": "non-persistent://tg/response/embeddings:doc-proc" - } - } -} -``` \ No newline at end of file diff --git a/docs/cli/tg-put-kg-core.md b/docs/cli/tg-put-kg-core.md deleted file mode 100644 index a14871a2..00000000 --- a/docs/cli/tg-put-kg-core.md +++ /dev/null @@ -1,241 +0,0 @@ -# tg-put-kg-core - -Stores a knowledge core in the TrustGraph system from MessagePack format. - -## Synopsis - -```bash -tg-put-kg-core --id CORE_ID -i INPUT_FILE [options] -``` - -## Description - -The `tg-put-kg-core` command loads a knowledge core from a MessagePack-formatted file and stores it in the TrustGraph knowledge system. Knowledge cores contain RDF triples and graph embeddings that represent structured knowledge and can be loaded into flows for processing. - -This command processes MessagePack files containing both triples (RDF knowledge) and graph embeddings (vector representations) and stores them via WebSocket connection to the Knowledge API. - -## Options - -### Required Arguments - -- `--id, --identifier CORE_ID`: Unique identifier for the knowledge core -- `-i, --input INPUT_FILE`: Path to MessagePack input file - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `ws://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) - -## Examples - -### Store Knowledge Core -```bash -tg-put-kg-core --id "research-core-v1" -i knowledge.msgpack -``` - -### With Custom User -```bash -tg-put-kg-core \ - --id "medical-knowledge" \ - -i medical-data.msgpack \ - -U researcher -``` - -### Using Custom API URL -```bash -tg-put-kg-core \ - --id "production-core" \ - -i prod-knowledge.msgpack \ - -u ws://production:8088/ -``` - -## Input File Format - -The input file must be in MessagePack format containing structured knowledge data: - -### MessagePack Structure -The file contains tuples with type indicators: - -#### Triple Data (`"t"`) -```python -("t", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "t": [ # triples array - { - "s": {"value": "subject", "is_uri": true}, - "p": {"value": "predicate", "is_uri": true}, - "o": {"value": "object", "is_uri": false} - } - ] -}) -``` - -#### Graph Embeddings Data (`"ge"`) -```python -("ge", { - "m": { # metadata - "i": "core-id", - "m": [], # metadata triples - "u": "user", - "c": "collection" - }, - "e": [ # entities array - { - "e": {"value": "entity", "is_uri": true}, - "v": [[0.1, 0.2, 0.3]] # vectors - } - ] -}) -``` - -## Processing Flow - -1. **File Reading**: Opens MessagePack file for binary reading -2. **Message Unpacking**: Unpacks MessagePack tuples sequentially -3. **Type Processing**: Handles both triples (`"t"`) and graph embeddings (`"ge"`) -4. **WebSocket Transmission**: Sends each message via WebSocket to Knowledge API -5. **Response Handling**: Waits for confirmation of each message -6. **Progress Reporting**: Shows count of processed messages - -## Output - -The command reports the number of messages processed: - -```bash -Put: 150 triple, 75 GE messages. -``` - -Where: -- **triple**: Number of triple data messages processed -- **GE**: Number of graph embedding messages processed - -## Error Handling - -### File Not Found -```bash -Exception: No such file or directory: 'missing.msgpack' -``` -**Solution**: Verify the input file path exists and is readable. - -### Invalid MessagePack Format -```bash -Exception: Unpacked unexpected message type 'x' -``` -**Solution**: Ensure the input file is properly formatted MessagePack with correct type indicators. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Knowledge API Errors -```bash -Exception: Knowledge core operation failed -``` -**Solution**: Check that the Knowledge API is available and the core ID is valid. - -## File Creation - -MessagePack files can be created using: - -### Python Example -```python -import msgpack - -# Create triples data -triples_msg = ("t", { - "m": {"i": "core-id", "m": [], "u": "user", "c": "default"}, - "t": [ - { - "s": {"value": "Person1", "is_uri": True}, - "p": {"value": "hasName", "is_uri": True}, - "o": {"value": "John Doe", "is_uri": False} - } - ] -}) - -# Create embeddings data -embeddings_msg = ("ge", { - "m": {"i": "core-id", "m": [], "u": "user", "c": "default"}, - "e": [ - { - "e": {"value": "Person1", "is_uri": True}, - "v": [[0.1, 0.2, 0.3, 0.4]] - } - ] -}) - -# Write to file -with open("knowledge.msgpack", "wb") as f: - msgpack.pack(triples_msg, f) - msgpack.pack(embeddings_msg, f) -``` - -### Export from Existing Core -```bash -# Export existing core to MessagePack -tg-get-kg-core --id "existing-core" -o exported.msgpack - -# Import to new core -tg-put-kg-core --id "new-core" -i exported.msgpack -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL (automatically converted to WebSocket format) - -## Related Commands - -- [`tg-get-kg-core`](tg-get-kg-core.md) - Retrieve knowledge core -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge core into flow -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-delete-kg-core`](tg-delete-kg-core.md) - Remove knowledge core -- [`tg-dump-msgpack`](tg-dump-msgpack.md) - Debug MessagePack files - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) via WebSocket connection with `put-kg-core` operations to store knowledge data. - -## Use Cases - -### Knowledge Import -```bash -# Import knowledge from external systems -tg-put-kg-core --id "external-kb" -i imported-knowledge.msgpack -``` - -### Data Migration -```bash -# Migrate knowledge between environments -tg-get-kg-core --id "prod-core" -o backup.msgpack -tg-put-kg-core --id "dev-core" -i backup.msgpack -``` - -### Knowledge Versioning -```bash -# Store versioned knowledge cores -tg-put-kg-core --id "research-v2.0" -i research-updated.msgpack -``` - -### Batch Knowledge Loading -```bash -# Load multiple knowledge domains -tg-put-kg-core --id "medical-core" -i medical.msgpack -tg-put-kg-core --id "legal-core" -i legal.msgpack -tg-put-kg-core --id "technical-core" -i technical.msgpack -``` - -## Best Practices - -1. **Unique IDs**: Use descriptive, unique identifiers for knowledge cores -2. **Versioning**: Include version information in core IDs -3. **Validation**: Verify MessagePack files before importing -4. **Backup**: Keep backup copies of important knowledge cores -5. **Documentation**: Document knowledge core contents and sources -6. **Testing**: Test imports with small datasets first \ No newline at end of file diff --git a/docs/cli/tg-remove-library-document.md b/docs/cli/tg-remove-library-document.md deleted file mode 100644 index f3095e85..00000000 --- a/docs/cli/tg-remove-library-document.md +++ /dev/null @@ -1,530 +0,0 @@ -# tg-remove-library-document - -Removes a document from the TrustGraph document library. - -## Synopsis - -```bash -tg-remove-library-document --id DOCUMENT_ID [options] -``` - -## Description - -The `tg-remove-library-document` command permanently removes a document from TrustGraph's document library. This operation deletes the document metadata, content, and any associated processing records. - -**⚠️ Warning**: This operation is permanent and cannot be undone. Ensure you have backups if the document data is important. - -## Options - -### Required Arguments - -- `--identifier, --id ID`: Document ID to remove - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID (default: `trustgraph`) - -## Examples - -### Remove Single Document -```bash -tg-remove-library-document --id "doc_123456789" -``` - -### Remove with Custom User -```bash -tg-remove-library-document --id "doc_987654321" -U "research-team" -``` - -### Remove with Custom API URL -```bash -tg-remove-library-document --id "doc_555" -u http://staging:8088/ -``` - -## Prerequisites - -### Document Must Exist -Verify the document exists before attempting removal: - -```bash -# List documents to find the ID -tg-show-library-documents - -# Search for specific document -tg-show-library-documents | grep "doc_123456789" -``` - -### Check for Active Processing -Before removing a document, check if it's currently being processed: - -```bash -# Check for active processing jobs -tg-show-flows | grep "processing" - -# Stop any active processing first -# tg-stop-library-processing --id "processing_id" -``` - -## Use Cases - -### Cleanup Old Documents -```bash -# Remove outdated documents -old_docs=("doc_old1" "doc_old2" "doc_deprecated") -for doc_id in "${old_docs[@]}"; do - echo "Removing $doc_id..." - tg-remove-library-document --id "$doc_id" -done -``` - -### Remove Test Documents -```bash -# Remove test documents after development -tg-show-library-documents | \ - grep "test\|demo\|sample" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - echo "Removing test document: $doc_id" - tg-remove-library-document --id "$doc_id" - done -``` - -### User-Specific Cleanup -```bash -# Remove all documents for a specific user -cleanup_user_documents() { - local user="$1" - - echo "Removing all documents for user: $user" - - # Get document IDs for the user - tg-show-library-documents -U "$user" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - echo "Removing document: $doc_id" - tg-remove-library-document --id "$doc_id" -U "$user" - done -} - -# Usage -cleanup_user_documents "temp-user" -``` - -### Conditional Removal -```bash -# Remove documents based on criteria -remove_by_criteria() { - local criteria="$1" - - echo "Removing documents matching criteria: $criteria" - - tg-show-library-documents | \ - grep -B5 -A5 "$criteria" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - # Confirm before removal - echo -n "Remove document $doc_id? (y/N): " - read confirm - if [[ "$confirm" =~ ^[Yy]$ ]]; then - tg-remove-library-document --id "$doc_id" - echo "Removed: $doc_id" - else - echo "Skipped: $doc_id" - fi - done -} - -# Remove documents containing "draft" in title -remove_by_criteria "draft" -``` - -## Safety Procedures - -### Backup Before Removal -```bash -# Create backup of document metadata before removal -backup_document() { - local doc_id="$1" - local backup_dir="document_backups/$(date +%Y%m%d)" - - mkdir -p "$backup_dir" - - echo "Backing up document: $doc_id" - - # Get document metadata - tg-show-library-documents | \ - grep -A10 -B2 "$doc_id" > "$backup_dir/$doc_id.metadata" - - # Note: Actual document content backup would require additional API - echo "Backup saved: $backup_dir/$doc_id.metadata" -} - -# Backup then remove -safe_remove() { - local doc_id="$1" - - backup_document "$doc_id" - - echo "Removing document: $doc_id" - tg-remove-library-document --id "$doc_id" - - echo "Document removed: $doc_id" -} - -# Usage -safe_remove "doc_123456789" -``` - -### Verification Script -```bash -#!/bin/bash -# safe-remove-document.sh -doc_id="$1" -user="${2:-trustgraph}" - -if [ -z "$doc_id" ]; then - echo "Usage: $0 [user]" - exit 1 -fi - -echo "Safety checks for removing document: $doc_id" - -# Check if document exists -if ! tg-show-library-documents -U "$user" | grep -q "$doc_id"; then - echo "ERROR: Document '$doc_id' not found for user '$user'" - exit 1 -fi - -# Show document details -echo "Document details:" -tg-show-library-documents -U "$user" | grep -A10 -B2 "$doc_id" - -# Check for active processing -echo "Checking for active processing..." -active_processing=$(tg-show-flows | grep -c "processing.*$doc_id" || echo "0") -if [ "$active_processing" -gt 0 ]; then - echo "WARNING: Document has $active_processing active processing jobs" - echo "Consider stopping processing first" -fi - -# Confirm removal -echo "" -read -p "Are you sure you want to remove this document? (y/N): " confirm - -if [ "$confirm" = "y" ] || [ "$confirm" = "Y" ]; then - echo "Removing document..." - tg-remove-library-document --id "$doc_id" -U "$user" - - # Verify removal - if ! tg-show-library-documents -U "$user" | grep -q "$doc_id"; then - echo "Document removed successfully" - else - echo "ERROR: Document still exists after removal" - exit 1 - fi -else - echo "Removal cancelled" -fi -``` - -### Bulk Removal with Confirmation -```bash -# Remove multiple documents with individual confirmation -bulk_remove_with_confirmation() { - local doc_list="$1" - - if [ ! -f "$doc_list" ]; then - echo "Usage: $0 " - return 1 - fi - - echo "Bulk removal with confirmation" - echo "Document list: $doc_list" - echo "==============================" - - while IFS= read -r doc_id; do - if [ -n "$doc_id" ]; then - # Show document info - echo -e "\nDocument ID: $doc_id" - tg-show-library-documents | grep -A5 -B1 "$doc_id" | grep -E "title|note|tags" - - # Confirm removal - echo -n "Remove this document? (y/N/q): " - read confirm - - case "$confirm" in - y|Y) - tg-remove-library-document --id "$doc_id" - echo "Removed: $doc_id" - ;; - q|Q) - echo "Quitting bulk removal" - break - ;; - *) - echo "Skipped: $doc_id" - ;; - esac - fi - done < "$doc_list" -} - -# Create list of documents to remove -echo -e "doc_123\ndoc_456\ndoc_789" > remove_list.txt -bulk_remove_with_confirmation "remove_list.txt" -``` - -## Advanced Usage - -### Age-Based Removal -```bash -# Remove documents older than specified days -remove_old_documents() { - local days_old="$1" - local dry_run="${2:-false}" - - if [ -z "$days_old" ]; then - echo "Usage: remove_old_documents [dry_run]" - return 1 - fi - - cutoff_date=$(date -d "$days_old days ago" +"%Y-%m-%d") - echo "Removing documents older than $cutoff_date" - - tg-show-library-documents | \ - awk -v cutoff="$cutoff_date" -v dry="$dry_run" ' - /^\| id/ { id = $3 } - /^\| time/ { - if ($3 < cutoff) { - if (dry == "true") { - print "Would remove: " id " (date: " $3 ")" - } else { - system("tg-remove-library-document --id " id) - print "Removed: " id " (date: " $3 ")" - } - } - }' -} - -# Dry run first -remove_old_documents 90 true - -# Actually remove -remove_old_documents 90 false -``` - -### Size-Based Cleanup -```bash -# Remove documents based on collection size limits -cleanup_by_collection_size() { - local max_docs="$1" - - echo "Maintaining maximum $max_docs documents per user" - - # Get unique users - users=$(tg-show-library-documents | grep "| id" | awk '{print $3}' | sort | uniq) - - for user in $users; do - echo "Checking user: $user" - - # Count documents for user - doc_count=$(tg-show-library-documents -U "$user" | grep -c "| id") - - if [ "$doc_count" -gt "$max_docs" ]; then - excess=$((doc_count - max_docs)) - echo "User $user has $doc_count documents (removing $excess oldest)" - - # Get oldest documents (by time) - tg-show-library-documents -U "$user" | \ - awk ' - /^\| id/ { id = $3 } - /^\| time/ { print $3 " " id } - ' | \ - sort | \ - head -n "$excess" | \ - while read date doc_id; do - echo "Removing old document: $doc_id ($date)" - tg-remove-library-document --id "$doc_id" -U "$user" - done - else - echo "User $user has $doc_count documents (within limit)" - fi - done -} - -# Maintain maximum 100 documents per user -cleanup_by_collection_size 100 -``` - -### Pattern-Based Removal -```bash -# Remove documents matching specific patterns -remove_by_pattern() { - local pattern="$1" - local field="${2:-title}" - - echo "Removing documents with '$pattern' in $field" - - tg-show-library-documents | \ - awk -v pattern="$pattern" -v field="$field" ' - /^\| id/ { id = $3 } - /^\| title/ && field=="title" { if ($0 ~ pattern) print id } - /^\| note/ && field=="note" { if ($0 ~ pattern) print id } - /^\| tags/ && field=="tags" { if ($0 ~ pattern) print id } - ' | \ - while read doc_id; do - echo "Removing document: $doc_id" - tg-remove-library-document --id "$doc_id" - done -} - -# Remove all test documents -remove_by_pattern "test" "title" -remove_by_pattern "temp" "tags" -``` - -## Error Handling - -### Document Not Found -```bash -Exception: Document not found -``` -**Solution**: Verify document ID exists with `tg-show-library-documents`. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Check user permissions and document ownership. - -### Active Processing -```bash -Exception: Cannot remove document with active processing -``` -**Solution**: Stop processing with `tg-stop-library-processing` before removal. - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -## Monitoring and Logging - -### Removal Logging -```bash -# Log all removals -logged_remove() { - local doc_id="$1" - local log_file="document_removals.log" - - timestamp=$(date) - echo "[$timestamp] Removing document: $doc_id" >> "$log_file" - - # Get document info before removal - tg-show-library-documents | \ - grep -A5 -B1 "$doc_id" >> "$log_file" - - # Remove document - if tg-remove-library-document --id "$doc_id"; then - echo "[$timestamp] Successfully removed: $doc_id" >> "$log_file" - else - echo "[$timestamp] Failed to remove: $doc_id" >> "$log_file" - fi - - echo "---" >> "$log_file" -} - -# Usage -logged_remove "doc_123456789" -``` - -### Audit Trail -```bash -# Create audit trail for removals -create_removal_audit() { - local doc_id="$1" - local reason="$2" - local audit_file="removal_audit.csv" - - # Create header if file doesn't exist - if [ ! -f "$audit_file" ]; then - echo "timestamp,document_id,user,reason,status" > "$audit_file" - fi - - timestamp=$(date '+%Y-%m-%d %H:%M:%S') - user=$(whoami) - - # Attempt removal - if tg-remove-library-document --id "$doc_id"; then - status="success" - else - status="failed" - fi - - # Log to audit file - echo "$timestamp,$doc_id,$user,$reason,$status" >> "$audit_file" -} - -# Usage -create_removal_audit "doc_123" "Outdated content" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-library-documents`](tg-show-library-documents.md) - List library documents -- [`tg-add-library-document`](tg-add-library-document.md) - Add documents to library -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start document processing -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop document processing - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to remove documents from the document repository. - -## Best Practices - -1. **Always Backup**: Create backups before removing important documents -2. **Verification**: Verify document existence before removal attempts -3. **Processing Check**: Ensure no active processing before removal -4. **Audit Trail**: Maintain logs of all removal operations -5. **Confirmation**: Use interactive confirmation for bulk operations -6. **Testing**: Test removal procedures in non-production environments -7. **Access Control**: Ensure appropriate permissions for removal operations - -## Troubleshooting - -### Document Still Exists After Removal -```bash -# Verify removal -tg-show-library-documents | grep "document-id" - -# Check for caching issues -# Wait a moment and try again - -# Verify API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/library/documents" > /dev/null -``` - -### Permission Issues -```bash -# Check user permissions -tg-show-library-documents -U "your-user" | grep "document-id" - -# Verify user ownership of document -``` - -### Cannot Remove Due to References -```bash -# Check for document references in processing jobs -tg-show-flows | grep "document-id" - -# Stop any referencing processes first -``` \ No newline at end of file diff --git a/docs/cli/tg-save-doc-embeds.md b/docs/cli/tg-save-doc-embeds.md deleted file mode 100644 index cdbd7882..00000000 --- a/docs/cli/tg-save-doc-embeds.md +++ /dev/null @@ -1,609 +0,0 @@ -# tg-save-doc-embeds - -Saves document embeddings from TrustGraph processing streams to MessagePack format files. - -## Synopsis - -```bash -tg-save-doc-embeds -o OUTPUT_FILE [options] -``` - -## Description - -The `tg-save-doc-embeds` command connects to TrustGraph's document embeddings export stream and saves the embeddings to a file in MessagePack format. This is useful for creating backups of document embeddings, exporting data for analysis, or preparing data for migration between systems. - -The command should typically be started before document processing begins to capture all embeddings as they are generated. - -## Options - -### Required Arguments - -- `-o, --output-file FILE`: Output file for saved embeddings - -### Optional Arguments - -- `-u, --url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_API` or `http://localhost:8088/`) -- `-f, --flow-id ID`: Flow instance ID to monitor (default: `default`) -- `--format FORMAT`: Output format - `msgpack` or `json` (default: `msgpack`) -- `--user USER`: Filter by user ID (default: no filter) -- `--collection COLLECTION`: Filter by collection ID (default: no filter) - -## Examples - -### Basic Document Embeddings Export -```bash -tg-save-doc-embeds -o document-embeddings.msgpack -``` - -### Export from Specific Flow -```bash -tg-save-doc-embeds \ - -o research-embeddings.msgpack \ - -f "research-processing-flow" -``` - -### Filter by User and Collection -```bash -tg-save-doc-embeds \ - -o filtered-embeddings.msgpack \ - --user "research-team" \ - --collection "research-docs" -``` - -### Export to JSON Format -```bash -tg-save-doc-embeds \ - -o embeddings.json \ - --format json -``` - -### Production Backup -```bash -tg-save-doc-embeds \ - -o "backup-$(date +%Y%m%d-%H%M%S).msgpack" \ - -u https://production-api.company.com/ \ - -f "production-flow" -``` - -## Output Format - -### MessagePack Structure -Document embeddings are saved as MessagePack records: - -```json -["de", { - "m": { - "i": "document-id", - "m": [{"metadata": "objects"}], - "u": "user-id", - "c": "collection-id" - }, - "c": [{ - "c": "text chunk content", - "v": [0.1, 0.2, 0.3, ...] - }] -}] -``` - -### Components -- **Record Type**: `"de"` indicates document embeddings -- **Metadata** (`m`): Document information and context -- **Chunks** (`c`): Text chunks with their vector embeddings - -## Use Cases - -### Backup Creation -```bash -# Create regular backups of document embeddings -create_embeddings_backup() { - local backup_dir="embeddings-backups" - local timestamp=$(date +%Y%m%d_%H%M%S) - local backup_file="$backup_dir/embeddings-$timestamp.msgpack" - - mkdir -p "$backup_dir" - - echo "Creating embeddings backup: $backup_file" - - # Start backup process - tg-save-doc-embeds -o "$backup_file" & - save_pid=$! - - echo "Backup process started (PID: $save_pid)" - echo "To stop: kill $save_pid" - echo "Backup file: $backup_file" - - # Optionally wait for a specific duration - # sleep 3600 # Run for 1 hour - # kill $save_pid -} - -# Create backup -create_embeddings_backup -``` - -### Data Migration Preparation -```bash -# Prepare embeddings for migration -prepare_migration_data() { - local source_env="$1" - local collection="$2" - local migration_file="migration-$(date +%Y%m%d).msgpack" - - echo "Preparing migration data from: $source_env" - echo "Collection: $collection" - - # Export embeddings from source - tg-save-doc-embeds \ - -o "$migration_file" \ - -u "http://$source_env:8088/" \ - --collection "$collection" & - - export_pid=$! - - # Let it run for specified time to capture data - echo "Capturing embeddings for migration..." - echo "Process PID: $export_pid" - - # In practice, you'd run this for the duration needed - # sleep 1800 # 30 minutes - # kill $export_pid - - echo "Migration data will be saved to: $migration_file" -} - -# Prepare migration from dev to production -prepare_migration_data "dev-server" "processed-docs" -``` - -### Continuous Export -```bash -# Continuous embeddings export with rotation -continuous_export() { - local output_dir="continuous-exports" - local rotation_hours=24 - local file_prefix="embeddings" - - mkdir -p "$output_dir" - - while true; do - timestamp=$(date +%Y%m%d_%H%M%S) - output_file="$output_dir/${file_prefix}-${timestamp}.msgpack" - - echo "Starting export to: $output_file" - - # Start export for specified duration - timeout ${rotation_hours}h tg-save-doc-embeds -o "$output_file" - - # Compress completed file - gzip "$output_file" - - echo "Export completed and compressed: ${output_file}.gz" - - # Optional: clean up old files - find "$output_dir" -name "*.msgpack.gz" -mtime +30 -delete - - # Brief pause before next rotation - sleep 60 - done -} - -# Start continuous export (run in background) -continuous_export & -``` - -### Analysis and Research -```bash -# Export embeddings for research analysis -export_for_research() { - local research_topic="$1" - local output_file="research-${research_topic}-$(date +%Y%m%d).msgpack" - - echo "Exporting embeddings for research: $research_topic" - - # Start export with filtering - tg-save-doc-embeds \ - -o "$output_file" \ - --collection "$research_topic" & - - export_pid=$! - - echo "Research export started (PID: $export_pid)" - echo "Output: $output_file" - - # Create analysis script - cat > "analyze-${research_topic}.sh" << EOF -#!/bin/bash -# Analysis script for $research_topic embeddings - -echo "Analyzing $research_topic embeddings..." - -# Basic statistics -echo "=== Basic Statistics ===" -tg-dump-msgpack -i "$output_file" --summary - -# Detailed analysis -echo "=== Detailed Analysis ===" -tg-dump-msgpack -i "$output_file" | head -10 - -echo "Analysis complete for $research_topic" -EOF - - chmod +x "analyze-${research_topic}.sh" - echo "Analysis script created: analyze-${research_topic}.sh" -} - -# Export for different research topics -export_for_research "cybersecurity" -export_for_research "climate-change" -``` - -## Advanced Usage - -### Selective Export -```bash -# Export embeddings with multiple filters -selective_export() { - local users=("user1" "user2" "user3") - local collections=("docs1" "docs2") - - for user in "${users[@]}"; do - for collection in "${collections[@]}"; do - output_file="embeddings-${user}-${collection}.msgpack" - - echo "Exporting for user: $user, collection: $collection" - - tg-save-doc-embeds \ - -o "$output_file" \ - --user "$user" \ - --collection "$collection" & - - # Store PID for later management - echo $! > "${output_file}.pid" - done - done - - echo "All selective exports started" -} -``` - -### Monitoring and Statistics -```bash -# Monitor export progress with statistics -monitor_export() { - local output_file="$1" - local pid_file="${output_file}.pid" - - if [ ! -f "$pid_file" ]; then - echo "PID file not found: $pid_file" - return 1 - fi - - local export_pid=$(cat "$pid_file") - - echo "Monitoring export (PID: $export_pid)..." - echo "Output file: $output_file" - - while kill -0 "$export_pid" 2>/dev/null; do - if [ -f "$output_file" ]; then - file_size=$(stat -c%s "$output_file" 2>/dev/null || echo "0") - human_size=$(numfmt --to=iec-i --suffix=B "$file_size") - - # Try to count embeddings - embedding_count=$(tg-dump-msgpack -i "$output_file" 2>/dev/null | grep -c '^\["de"' || echo "0") - - echo "File size: $human_size, Embeddings: $embedding_count" - else - echo "Output file not yet created..." - fi - - sleep 30 - done - - echo "Export process completed" - rm "$pid_file" -} - -# Start export and monitor -tg-save-doc-embeds -o "monitored-export.msgpack" & -echo $! > "monitored-export.msgpack.pid" -monitor_export "monitored-export.msgpack" -``` - -### Export Validation -```bash -# Validate exported embeddings -validate_export() { - local export_file="$1" - - echo "Validating export file: $export_file" - - # Check file exists and has content - if [ ! -s "$export_file" ]; then - echo "✗ Export file is empty or missing" - return 1 - fi - - # Check MessagePack format - if tg-dump-msgpack -i "$export_file" --summary > /dev/null 2>&1; then - echo "✓ Valid MessagePack format" - else - echo "✗ Invalid MessagePack format" - return 1 - fi - - # Check for document embeddings - embedding_count=$(tg-dump-msgpack -i "$export_file" | grep -c '^\["de"' || echo "0") - - if [ "$embedding_count" -gt 0 ]; then - echo "✓ Contains $embedding_count document embeddings" - else - echo "✗ No document embeddings found" - return 1 - fi - - # Get vector dimension information - summary=$(tg-dump-msgpack -i "$export_file" --summary) - if echo "$summary" | grep -q "Vector dimension:"; then - dimension=$(echo "$summary" | grep "Vector dimension:" | awk '{print $3}') - echo "✓ Vector dimension: $dimension" - else - echo "⚠ Could not determine vector dimension" - fi - - echo "Validation completed successfully" -} -``` - -### Export Scheduling -```bash -# Scheduled export with cron-like functionality -schedule_export() { - local schedule="$1" # e.g., "daily", "hourly", "weekly" - local output_prefix="$2" - - case "$schedule" in - "hourly") - interval=3600 - ;; - "daily") - interval=86400 - ;; - "weekly") - interval=604800 - ;; - *) - echo "Invalid schedule: $schedule" - return 1 - ;; - esac - - echo "Starting $schedule exports with prefix: $output_prefix" - - while true; do - timestamp=$(date +%Y%m%d_%H%M%S) - output_file="${output_prefix}-${timestamp}.msgpack" - - echo "Starting scheduled export: $output_file" - - # Run export for the scheduled interval - timeout ${interval}s tg-save-doc-embeds -o "$output_file" - - # Validate and compress - if validate_export "$output_file"; then - gzip "$output_file" - echo "✓ Export completed and compressed: ${output_file}.gz" - else - echo "✗ Export validation failed: $output_file" - mv "$output_file" "${output_file}.failed" - fi - - # Brief pause before next cycle - sleep 60 - done -} - -# Start daily scheduled exports -schedule_export "daily" "daily-embeddings" & -``` - -## Performance Considerations - -### Memory Management -```bash -# Monitor memory usage during export -monitor_memory_export() { - local output_file="$1" - - # Start export - tg-save-doc-embeds -o "$output_file" & - export_pid=$! - - echo "Monitoring memory usage for export (PID: $export_pid)..." - - while kill -0 "$export_pid" 2>/dev/null; do - memory_usage=$(ps -p "$export_pid" -o rss= 2>/dev/null | awk '{print $1/1024}') - - if [ -n "$memory_usage" ]; then - echo "Memory usage: ${memory_usage}MB" - fi - - sleep 10 - done - - echo "Export completed" -} -``` - -### Network Optimization -```bash -# Optimize for network conditions -network_optimized_export() { - local output_file="$1" - local api_url="$2" - - echo "Starting network-optimized export..." - - # Use compression and buffering - tg-save-doc-embeds \ - -o "$output_file" \ - -u "$api_url" \ - --format msgpack & # MessagePack is more compact than JSON - - export_pid=$! - - # Monitor network usage - echo "Monitoring export (PID: $export_pid)..." - - while kill -0 "$export_pid" 2>/dev/null; do - # Monitor network connections - connections=$(netstat -an | grep ":8088" | wc -l) - echo "Active connections: $connections" - sleep 30 - done -} -``` - -## Error Handling - -### Connection Issues -```bash -Exception: WebSocket connection failed -``` -**Solution**: Check API URL and ensure TrustGraph WebSocket service is running. - -### Disk Space Issues -```bash -Exception: No space left on device -``` -**Solution**: Free up disk space or use a different output location. - -### Permission Errors -```bash -Exception: Permission denied -``` -**Solution**: Check write permissions for the output file location. - -### Memory Issues -```bash -MemoryError: Unable to allocate memory -``` -**Solution**: Monitor memory usage and consider using smaller export windows. - -## Integration with Other Commands - -### Complete Backup Workflow -```bash -# Complete backup and restore workflow -backup_restore_workflow() { - local backup_file="embeddings-backup.msgpack" - - echo "=== Backup Phase ===" - - # Create backup - tg-save-doc-embeds -o "$backup_file" & - backup_pid=$! - - # Let it run for a while - sleep 300 # 5 minutes - kill $backup_pid - - echo "Backup created: $backup_file" - - # Validate backup - validate_export "$backup_file" - - echo "=== Restore Phase ===" - - # Restore from backup (to different collection) - tg-load-doc-embeds -i "$backup_file" --collection "restored" - - echo "Backup and restore workflow completed" -} -``` - -### Analysis Pipeline -```bash -# Export and analyze embeddings -export_analyze_pipeline() { - local topic="$1" - local export_file="analysis-${topic}.msgpack" - - echo "Starting export and analysis pipeline for: $topic" - - # Export embeddings - tg-save-doc-embeds \ - -o "$export_file" \ - --collection "$topic" & - - export_pid=$! - - # Run for analysis duration - sleep 600 # 10 minutes - kill $export_pid - - # Analyze exported data - echo "Analyzing exported embeddings..." - tg-dump-msgpack -i "$export_file" --summary - - # Count embeddings by user - echo "Embeddings by user:" - tg-dump-msgpack -i "$export_file" | \ - jq -r '.[1].m.u' | \ - sort | uniq -c - - echo "Analysis pipeline completed" -} -``` - -## Environment Variables - -- `TRUSTGRAPH_API`: Default API URL - -## Related Commands - -- [`tg-load-doc-embeds`](tg-load-doc-embeds.md) - Load document embeddings from files -- [`tg-dump-msgpack`](tg-dump-msgpack.md) - Analyze MessagePack files -- [`tg-show-flows`](tg-show-flows.md) - List available flows for monitoring - -## API Integration - -This command uses TrustGraph's WebSocket API for document embeddings export, specifically the `/api/v1/flow/{flow-id}/export/document-embeddings` endpoint. - -## Best Practices - -1. **Start Early**: Begin export before processing starts to capture all data -2. **Monitoring**: Monitor export progress and file sizes -3. **Validation**: Always validate exported files -4. **Compression**: Use compression for long-term storage -5. **Rotation**: Implement file rotation for continuous exports -6. **Backup**: Keep multiple backup copies in different locations -7. **Documentation**: Document export schedules and procedures - -## Troubleshooting - -### No Data Captured -```bash -# Check if processing is generating embeddings -tg-show-flows | grep processing - -# Verify WebSocket connection -netstat -an | grep :8088 -``` - -### Large File Issues -```bash -# Monitor file growth -watch -n 5 'ls -lh *.msgpack' - -# Check available disk space -df -h -``` - -### Process Management -```bash -# List running export processes -ps aux | grep tg-save-doc-embeds - -# Kill stuck processes -pkill -f tg-save-doc-embeds -``` \ No newline at end of file diff --git a/docs/cli/tg-set-mcp-tool.md b/docs/cli/tg-set-mcp-tool.md deleted file mode 100644 index 90f137a0..00000000 --- a/docs/cli/tg-set-mcp-tool.md +++ /dev/null @@ -1,379 +0,0 @@ -# tg-set-mcp-tool - -## Synopsis - -``` -tg-set-mcp-tool [OPTIONS] --id ID --tool-url URL [--auth-token TOKEN] -``` - -## Description - -The `tg-set-mcp-tool` command configures and registers MCP (Model Control Protocol) tools in the TrustGraph system. It allows defining MCP tool configurations with id, URL, and optional authentication token. Tools are stored in the 'mcp' configuration group for discovery and execution. - -This command is useful for: -- Registering MCP tool endpoints for agent use -- Configuring external MCP server connections -- Managing MCP tool registry for agent workflows -- Integrating third-party MCP tools into TrustGraph - -The command stores MCP tool configurations in the 'mcp' configuration group, separate from regular agent tools. - -## Options - -- `-u, --api-url URL` - - TrustGraph API URL for configuration storage - - Default: `http://localhost:8088/` (or `TRUSTGRAPH_URL` environment variable) - - Should point to a running TrustGraph API instance - -- `-i, --id ID` - - **Required.** MCP tool identifier - - Used to reference the MCP tool in configurations - - Must be unique within the MCP tool registry - -- `-r, --remote-name NAME` - - **Optional.** Remote MCP tool name used by the MCP server - - If not specified, defaults to the value of `--id` - - Use when the MCP server expects a different tool name - -- `--tool-url URL` - - **Required.** MCP tool URL endpoint - - Should point to the MCP server endpoint providing the tool functionality - - Must be a valid URL accessible by the TrustGraph system - -- `--auth-token TOKEN` - - **Optional.** Bearer token for authentication - - Used to authenticate with secured MCP endpoints - - Token is sent as `Authorization: Bearer {TOKEN}` header - - Stored in plaintext in configuration (see Security Considerations) - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic MCP Tool Registration - -Register a weather service MCP tool: -```bash -tg-set-mcp-tool --id weather --tool-url "http://localhost:3000/weather" -``` - -### Calculator MCP Tool - -Register a calculator MCP tool: -```bash -tg-set-mcp-tool --id calculator --tool-url "http://mcp-tools.example.com/calc" -``` - -### Remote MCP Service - -Register a remote MCP service: -```bash -tg-set-mcp-tool --id document-processor \ - --tool-url "https://api.example.com/mcp/documents" -``` - -### Secured MCP Tool with Authentication - -Register an MCP tool that requires bearer token authentication: -```bash -tg-set-mcp-tool --id secure-tool \ - --tool-url "https://api.example.com/mcp" \ - --auth-token "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." -``` - -### MCP Tool with Remote Name - -Register an MCP tool where the server uses a different name: -```bash -tg-set-mcp-tool --id my-weather \ - --remote-name weather_v2 \ - --tool-url "http://weather-server:3000/api" -``` - -### Custom API URL - -Register MCP tool with custom TrustGraph API: -```bash -tg-set-mcp-tool -u http://trustgraph.example.com:8088/ \ - --id custom-mcp --tool-url "http://custom.mcp.com/api" -``` - -### Local Development Setup - -Register MCP tools for local development: -```bash -tg-set-mcp-tool --id dev-tool --tool-url "http://localhost:8080/mcp" -``` - -### Production Setup with Authentication - -Register authenticated MCP tools for production: -```bash -# Using environment variable for token -export MCP_AUTH_TOKEN="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." -tg-set-mcp-tool --id prod-tool \ - --tool-url "https://prod-mcp.example.com/api" \ - --auth-token "$MCP_AUTH_TOKEN" -``` - -## MCP Tool Configuration - -MCP tools are configured with the following metadata: - -- **id**: Unique identifier for the tool (configuration key) -- **remote-name**: Name used by the MCP server (optional, defaults to id) -- **url**: Endpoint URL for the MCP server -- **auth-token**: Bearer token for authentication (optional) - -The configuration is stored as JSON in the 'mcp' configuration group: - -**Basic configuration:** -```json -{ - "remote-name": "weather", - "url": "http://localhost:3000/weather" -} -``` - -**Configuration with authentication:** -```json -{ - "remote-name": "secure-tool", - "url": "https://api.example.com/mcp", - "auth-token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." -} -``` - -## Advanced Usage - -### Updating Existing MCP Tools - -Update an existing MCP tool configuration: -```bash -# Update MCP tool URL -tg-set-mcp-tool --id weather --tool-url "http://new-weather-server:3000/api" - -# Add authentication to existing tool -tg-set-mcp-tool --id weather \ - --tool-url "http://weather-server:3000/api" \ - --auth-token "new-token-here" - -# Remove authentication (by setting tool without auth-token) -tg-set-mcp-tool --id weather --tool-url "http://weather-server:3000/api" -``` - -### Batch MCP Tool Registration - -Register multiple MCP tools in a script: -```bash -#!/bin/bash -# Register a suite of MCP tools -tg-set-mcp-tool --id search --tool-url "http://search-mcp:3000/api" -tg-set-mcp-tool --id translate --tool-url "http://translate-mcp:3000/api" -tg-set-mcp-tool --id summarize --tool-url "http://summarize-mcp:3000/api" - -# Register secured tools with authentication -tg-set-mcp-tool --id secure-search \ - --tool-url "https://secure-search:3000/api" \ - --auth-token "$SEARCH_TOKEN" -tg-set-mcp-tool --id secure-translate \ - --tool-url "https://secure-translate:3000/api" \ - --auth-token "$TRANSLATE_TOKEN" -``` - -### Environment-Specific Configuration - -Configure MCP tools for different environments: -```bash -# Development environment (no auth) -export TRUSTGRAPH_URL="http://dev.trustgraph.com:8088/" -tg-set-mcp-tool --id dev-mcp --tool-url "http://dev.mcp.com/api" - -# Production environment (with auth) -export TRUSTGRAPH_URL="http://prod.trustgraph.com:8088/" -export PROD_MCP_TOKEN="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9..." -tg-set-mcp-tool --id prod-mcp \ - --tool-url "https://prod.mcp.com/api" \ - --auth-token "$PROD_MCP_TOKEN" -``` - -### MCP Tool Validation - -Verify MCP tool registration: -```bash -# Register MCP tool and verify -tg-set-mcp-tool --id test-mcp --tool-url "http://test.mcp.com/api" - -# Check if MCP tool was registered and view auth status -tg-show-mcp-tools -``` - -## Error Handling - -The command handles various error conditions: - -- **Missing required arguments**: Both name and tool-url must be provided -- **Invalid URLs**: Tool URLs must be valid and accessible -- **API connection errors**: If the TrustGraph API is unavailable -- **Configuration errors**: If MCP tool data cannot be stored - -Common error scenarios: -```bash -# Missing required field -tg-set-mcp-tool --id tool1 -# Output: Exception: Must specify --tool-url for MCP tool - -# Missing id -tg-set-mcp-tool --tool-url "http://example.com/mcp" -# Output: Exception: Must specify --id for MCP tool - -# Invalid API URL -tg-set-mcp-tool -u "invalid-url" --id tool1 --tool-url "http://mcp.com" -# Output: Exception: [API connection error] -``` - -## Integration with Other Commands - -### With MCP Tool Management - -View registered MCP tools: -```bash -# Register MCP tool -tg-set-mcp-tool --id new-mcp --tool-url "http://new.mcp.com/api" - -# View all MCP tools (shows auth status) -tg-show-mcp-tools -``` - -### With Agent Workflows - -Use MCP tools in agent workflows: -```bash -# Register MCP tool with authentication -tg-set-mcp-tool --id weather \ - --tool-url "https://weather.mcp.com/api" \ - --auth-token "$WEATHER_TOKEN" - -# Invoke MCP tool directly (auth handled automatically) -tg-invoke-mcp-tool --name weather --parameters '{"location": "London"}' -``` - -### With Configuration Management - -MCP tools integrate with configuration management: -```bash -# Register MCP tool -tg-set-mcp-tool --id config-mcp --tool-url "http://config.mcp.com/api" - -# View all MCP tool configurations -tg-show-mcp-tools -``` - -## Best Practices - -1. **Clear Naming**: Use descriptive, unique MCP tool identifiers -2. **Reliable URLs**: Ensure MCP endpoints are stable and accessible -3. **Use HTTPS**: Always use HTTPS URLs when authentication is required -4. **Secure Tokens**: Store auth tokens in environment variables, not in scripts -5. **Token Rotation**: Regularly rotate authentication tokens -6. **Health Checks**: Verify MCP endpoints are operational before registration -7. **Documentation**: Document MCP tool capabilities and usage -8. **Error Handling**: Implement proper error handling for MCP endpoints -9. **Monitoring**: Monitor MCP tool availability and performance -10. **Access Control**: Restrict access to configuration system containing tokens - -## Troubleshooting - -### MCP Tool Not Appearing - -If a registered MCP tool doesn't appear in listings: -1. Verify the MCP tool was registered successfully -2. Check MCP tool registry with `tg-show-mcp-tools` -3. Ensure the API URL is correct -4. Verify TrustGraph API is running - -### MCP Tool Registration Errors - -If MCP tool registration fails: -1. Check all required arguments are provided -2. Verify the tool URL is accessible -3. Ensure the MCP endpoint is operational -4. Check API connectivity -5. Review error messages for specific issues - -### MCP Tool Connectivity Issues - -If MCP tools aren't working as expected: -1. Verify MCP endpoint is accessible from TrustGraph -2. Check MCP server logs for errors -3. Ensure MCP protocol compatibility -4. Review network connectivity and firewall rules -5. Test MCP endpoint directly - -## MCP Protocol - -The Model Control Protocol (MCP) is a standardized interface for AI model tools: - -- **Standardized API**: Consistent interface across different tools -- **Extensible**: Support for complex tool interactions -- **Stateful**: Can maintain state across multiple interactions -- **Secure**: Built-in security and authentication mechanisms - -## Security Considerations - -When registering MCP tools: - -1. **URL Validation**: Ensure URLs are legitimate and secure -2. **Network Security**: Always use HTTPS for authenticated endpoints -3. **Token Storage**: Auth tokens are stored in plaintext in the configuration system - - Ensure proper access control on the configuration storage - - Use short-lived tokens when possible - - Implement token rotation policies -4. **Token Transmission**: Use HTTPS to prevent token interception -5. **Access Control**: Implement proper authentication for MCP endpoints -6. **Token Exposure**: - - Use environment variables to pass tokens to the command - - Don't hardcode tokens in scripts or commit them to version control - - The `tg-show-mcp-tools` command masks token values for security -7. **Input Validation**: Validate all inputs to MCP tools -8. **Error Handling**: Don't expose sensitive information in error messages -9. **Least Privilege**: Grant tokens minimum required permissions -10. **Audit Logging**: Monitor configuration changes for security events - -### Authentication Best Practices - -When using the `--auth-token` parameter: - -- **Store tokens securely**: Use environment variables or secrets management systems -- **Use HTTPS**: Always use HTTPS URLs when providing authentication tokens -- **Rotate regularly**: Implement a token rotation schedule -- **Monitor usage**: Track which services are accessing authenticated endpoints -- **Revoke on compromise**: Have a process to quickly revoke and rotate compromised tokens - -Example secure workflow: -```bash -# Store token in environment variable (not in script) -export MCP_TOKEN=$(cat /secure/path/to/token) - -# Use HTTPS for authenticated endpoints -tg-set-mcp-tool --id secure-service \ - --tool-url "https://secure.example.com/mcp" \ - --auth-token "$MCP_TOKEN" - -# Clear token from environment after use -unset MCP_TOKEN -``` - -## Related Commands - -- [`tg-show-mcp-tools`](tg-show-mcp-tools.md) - Display registered MCP tools -- [`tg-delete-mcp-tool`](tg-delete-mcp-tool.md) - Remove MCP tool configurations -- [`tg-invoke-mcp-tool`](tg-invoke-mcp-tool.md) - Execute MCP tools -- [`tg-set-tool`](tg-set-tool.md) - Configure regular agent tools - -## See Also - -- MCP Protocol Documentation -- TrustGraph MCP Integration Guide -- Agent Tool Configuration Guide \ No newline at end of file diff --git a/docs/cli/tg-set-prompt.md b/docs/cli/tg-set-prompt.md deleted file mode 100644 index a230bf7b..00000000 --- a/docs/cli/tg-set-prompt.md +++ /dev/null @@ -1,442 +0,0 @@ -# tg-set-prompt - -Sets prompt templates and system prompts for TrustGraph LLM services. - -## Synopsis - -```bash -# Set a prompt template -tg-set-prompt --id TEMPLATE_ID --prompt TEMPLATE [options] - -# Set system prompt -tg-set-prompt --system SYSTEM_PROMPT -``` - -## Description - -The `tg-set-prompt` command configures prompt templates and system prompts used by TrustGraph's LLM services. Prompt templates contain placeholders like `{{variable}}` that are replaced with actual values when invoked. System prompts provide global context for all LLM interactions. - -Templates are stored in TrustGraph's configuration system and can be used with `tg-invoke-prompt` for consistent AI interactions. - -## Options - -### Prompt Template Mode - -- `--id ID`: Unique identifier for the prompt template (required for templates) -- `--prompt TEMPLATE`: Prompt template text with `{{variable}}` placeholders (required for templates) -- `--response TYPE`: Response format - `text` or `json` (default: `text`) -- `--schema SCHEMA`: JSON schema for structured responses (required when response is `json`) - -### System Prompt Mode - -- `--system PROMPT`: System prompt text (cannot be used with other options) - -### Common Options - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Basic Prompt Template -```bash -tg-set-prompt \ - --id "greeting" \ - --prompt "Hello {{name}}, welcome to {{place}}!" -``` - -### Question-Answer Template -```bash -tg-set-prompt \ - --id "question" \ - --prompt "Answer this question based on the context: {{question}}\n\nContext: {{context}}" -``` - -### JSON Response Template -```bash -tg-set-prompt \ - --id "extract-info" \ - --prompt "Extract key information from: {{text}}" \ - --response "json" \ - --schema '{"type": "object", "properties": {"name": {"type": "string"}, "age": {"type": "number"}}}' -``` - -### Analysis Template -```bash -tg-set-prompt \ - --id "analyze" \ - --prompt "Analyze the following {{data_type}} and provide insights about {{focus_area}}:\n\n{{data}}\n\nFormat the response as {{format}}." -``` - -### System Prompt -```bash -tg-set-prompt \ - --system "You are a helpful AI assistant. Always provide accurate, concise responses. When uncertain, clearly state your limitations." -``` - -## Template Variables - -### Variable Syntax -Templates use `{{variable}}` syntax for placeholders: -```bash -# Template -"Hello {{name}}, today is {{day}}" - -# Usage -tg-invoke-prompt greeting name="Alice" day="Monday" -# Result: "Hello Alice, today is Monday" -``` - -### Common Variables -- `{{text}}` - Input text for processing -- `{{question}}` - Question to answer -- `{{context}}` - Background context -- `{{data}}` - Data to analyze -- `{{format}}` - Output format specification - -## Response Types - -### Text Response (Default) -```bash -tg-set-prompt \ - --id "summarize" \ - --prompt "Summarize this text in {{max_words}} words: {{text}}" -``` - -### JSON Response -```bash -tg-set-prompt \ - --id "classify" \ - --prompt "Classify this text: {{text}}" \ - --response "json" \ - --schema '{ - "type": "object", - "properties": { - "category": {"type": "string"}, - "confidence": {"type": "number", "minimum": 0, "maximum": 1} - }, - "required": ["category", "confidence"] - }' -``` - -## Use Cases - -### Document Processing Templates -```bash -# Document summarization -tg-set-prompt \ - --id "document-summary" \ - --prompt "Provide a {{length}} summary of this document:\n\n{{document}}\n\nFocus on: {{focus_areas}}" - -# Key point extraction -tg-set-prompt \ - --id "extract-key-points" \ - --prompt "Extract the main points from: {{text}}\n\nReturn as a bulleted list." - -# Document classification -tg-set-prompt \ - --id "classify-document" \ - --prompt "Classify this document into one of these categories: {{categories}}\n\nDocument: {{text}}" \ - --response "json" \ - --schema '{"type": "object", "properties": {"category": {"type": "string"}, "confidence": {"type": "number"}}}' -``` - -### Code Analysis Templates -```bash -# Code review -tg-set-prompt \ - --id "code-review" \ - --prompt "Review this {{language}} code for {{focus}} issues:\n\n{{code}}\n\nProvide specific recommendations." - -# Bug detection -tg-set-prompt \ - --id "find-bugs" \ - --prompt "Analyze this code for potential bugs:\n\n{{code}}\n\nError context: {{error}}" - -# Code explanation -tg-set-prompt \ - --id "explain-code" \ - --prompt "Explain how this {{language}} code works:\n\n{{code}}\n\nTarget audience: {{audience}}" -``` - -### Data Analysis Templates -```bash -# Data insights -tg-set-prompt \ - --id "data-insights" \ - --prompt "Analyze this {{data_type}} data and provide insights:\n\n{{data}}\n\nFocus on: {{metrics}}" - -# Trend analysis -tg-set-prompt \ - --id "trend-analysis" \ - --prompt "Identify trends in this data over {{timeframe}}:\n\n{{data}}" \ - --response "json" \ - --schema '{"type": "object", "properties": {"trends": {"type": "array", "items": {"type": "string"}}}}' -``` - -### Content Generation Templates -```bash -# Marketing copy -tg-set-prompt \ - --id "marketing-copy" \ - --prompt "Create {{tone}} marketing copy for {{product}} targeting {{audience}}. Key features: {{features}}" - -# Technical documentation -tg-set-prompt \ - --id "tech-docs" \ - --prompt "Generate technical documentation for:\n\n{{code}}\n\nInclude: {{sections}}" -``` - -## Advanced Usage - -### Multi-Step Templates -```bash -# Research template -tg-set-prompt \ - --id "research" \ - --prompt "Research question: {{question}} - -Available sources: {{sources}} - -Please: -1. Analyze the question -2. Review relevant sources -3. Synthesize findings -4. Provide conclusions - -Format: {{output_format}}" -``` - -### Conditional Templates -```bash -# Adaptive response template -tg-set-prompt \ - --id "adaptive-response" \ - --prompt "Task: {{task}} -Context: {{context}} -Expertise level: {{level}} - -If expertise level is 'beginner', provide simple explanations. -If expertise level is 'advanced', include technical details. -If task involves code, include examples. - -Response:" -``` - -### Structured Analysis Template -```bash -tg-set-prompt \ - --id "structured-analysis" \ - --prompt "Analyze: {{subject}} -Criteria: {{criteria}} -Data: {{data}} - -Provide analysis in this structure: -- Overview -- Key Findings -- Recommendations -- Next Steps" \ - --response "json" \ - --schema '{ - "type": "object", - "properties": { - "overview": {"type": "string"}, - "key_findings": {"type": "array", "items": {"type": "string"}}, - "recommendations": {"type": "array", "items": {"type": "string"}}, - "next_steps": {"type": "array", "items": {"type": "string"}} - } - }' -``` - -### Template Management -```bash -# Create template collection for specific domain -domain="customer-support" -templates=( - "greeting:Hello! I'm here to help with {{issue_type}}. What can I assist you with?" - "escalation:I understand your frustration with {{issue}}. Let me escalate this to {{department}}." - "resolution:Great! I've resolved your {{issue}}. Is there anything else I can help with?" -) - -for template in "${templates[@]}"; do - IFS=':' read -r id prompt <<< "$template" - tg-set-prompt --id "${domain}-${id}" --prompt "$prompt" -done -``` - -## System Prompt Configuration - -### General Purpose System Prompt -```bash -tg-set-prompt --system "You are a knowledgeable AI assistant. Provide accurate, helpful responses. When you don't know something, say so clearly. Always consider the context and be concise unless detail is specifically requested." -``` - -### Domain-Specific System Prompt -```bash -tg-set-prompt --system "You are a technical documentation assistant specializing in software development. Focus on clarity, accuracy, and practical examples. Always include code snippets when relevant and explain complex concepts step-by-step." -``` - -### Role-Based System Prompt -```bash -tg-set-prompt --system "You are a data analyst AI. When analyzing data, always consider statistical significance, potential biases, and limitations. Present findings objectively and suggest actionable insights." -``` - -## Error Handling - -### Missing Required Fields -```bash -Exception: Must specify --id for prompt -``` -**Solution**: Provide both `--id` and `--prompt` for template creation. - -### Invalid Response Type -```bash -Exception: Response must be one of: text json -``` -**Solution**: Use only `text` or `json` for the `--response` option. - -### Invalid JSON Schema -```bash -Exception: JSON schema must be valid JSON -``` -**Solution**: Validate JSON schema syntax before using `--schema`. - -### Conflicting Options -```bash -Exception: Can't use --system with other args -``` -**Solution**: Use `--system` alone, or use template options without `--system`. - -## Template Testing - -### Test Template Creation -```bash -# Create and test a simple template -tg-set-prompt \ - --id "test-template" \ - --prompt "Test template with {{variable1}} and {{variable2}}" - -# Test the template -tg-invoke-prompt test-template variable1="hello" variable2="world" -``` - -### Validate JSON Templates -```bash -# Create JSON template -tg-set-prompt \ - --id "json-test" \ - --prompt "Extract data from: {{text}}" \ - --response "json" \ - --schema '{"type": "object", "properties": {"result": {"type": "string"}}}' - -# Test JSON response -tg-invoke-prompt json-test text="Sample text for testing" -``` - -### Template Iteration -```bash -# Version 1 -tg-set-prompt \ - --id "analysis-v1" \ - --prompt "Analyze: {{data}}" - -# Version 2 (improved) -tg-set-prompt \ - --id "analysis-v2" \ - --prompt "Analyze the following {{data_type}} and provide insights about {{focus}}:\n\n{{data}}\n\nConsider: {{considerations}}" - -# Version 3 (structured) -tg-set-prompt \ - --id "analysis-v3" \ - --prompt "Analyze: {{data}}" \ - --response "json" \ - --schema '{"type": "object", "properties": {"summary": {"type": "string"}, "insights": {"type": "array"}}}' -``` - -## Best Practices - -### Template Design -```bash -# Good: Clear, specific prompts -tg-set-prompt \ - --id "good-summary" \ - --prompt "Summarize this {{document_type}} in {{word_count}} words, focusing on {{key_aspects}}:\n\n{{content}}" - -# Better: Include context and constraints -tg-set-prompt \ - --id "better-summary" \ - --prompt "Task: Summarize the following {{document_type}} -Length: {{word_count}} words maximum -Focus: {{key_aspects}} -Audience: {{target_audience}} - -Document: -{{content}} - -Summary:" -``` - -### Variable Naming -```bash -# Use descriptive variable names -tg-set-prompt \ - --id "descriptive-vars" \ - --prompt "Analyze {{data_source}} data from {{time_period}} for {{business_metric}} trends" - -# Group related variables -tg-set-prompt \ - --id "grouped-vars" \ - --prompt "Compare {{baseline_data}} vs {{comparison_data}} using {{analysis_method}}" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-prompts`](tg-show-prompts.md) - Display configured prompts -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Use prompt templates -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based AI queries - -## API Integration - -This command uses the [Config API](../apis/api-config.md) to store prompt templates and system prompts in TrustGraph's configuration system. - -## Best Practices - -1. **Clear Templates**: Write clear, specific prompt templates -2. **Variable Names**: Use descriptive variable names -3. **Response Types**: Choose appropriate response types for your use case -4. **Schema Validation**: Always validate JSON schemas before setting -5. **Version Control**: Consider versioning important templates -6. **Testing**: Test templates thoroughly with various inputs -7. **Documentation**: Document template variables and expected usage - -## Troubleshooting - -### Template Not Working -```bash -# Check template exists -tg-show-prompts | grep "template-id" - -# Verify variable names match -tg-invoke-prompt template-id var1="test" var2="test" -``` - -### JSON Schema Errors -```bash -# Validate schema separately -echo '{"type": "object"}' | jq . - -# Test with simple schema first -tg-set-prompt --id "test" --prompt "test" --response "json" --schema '{"type": "string"}' -``` - -### System Prompt Issues -```bash -# Check current system prompt -tg-show-prompts | grep -A5 "System prompt" - -# Reset if needed -tg-set-prompt --system "Default system prompt" -``` \ No newline at end of file diff --git a/docs/cli/tg-set-token-costs.md b/docs/cli/tg-set-token-costs.md deleted file mode 100644 index a8c591d0..00000000 --- a/docs/cli/tg-set-token-costs.md +++ /dev/null @@ -1,464 +0,0 @@ -# tg-set-token-costs - -Sets token cost configuration for language models in TrustGraph. - -## Synopsis - -```bash -tg-set-token-costs --model MODEL_ID -i INPUT_COST -o OUTPUT_COST [options] -``` - -## Description - -The `tg-set-token-costs` command configures the token pricing for language models used by TrustGraph. This information is used for cost tracking, billing, and resource management across AI operations. - -Token costs are specified in dollars per million tokens and are stored in TrustGraph's configuration system for use by cost monitoring and reporting tools. - -## Options - -### Required Arguments - -- `--model MODEL_ID`: Language model identifier -- `-i, --input-costs COST`: Input token cost in $ per 1M tokens -- `-o, --output-costs COST`: Output token cost in $ per 1M tokens - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Set Costs for GPT-4 -```bash -tg-set-token-costs \ - --model "gpt-4" \ - -i 30.0 \ - -o 60.0 -``` - -### Set Costs for Claude Sonnet -```bash -tg-set-token-costs \ - --model "claude-3-sonnet" \ - -i 3.0 \ - -o 15.0 -``` - -### Set Costs for Local Model -```bash -tg-set-token-costs \ - --model "llama-2-7b" \ - -i 0.0 \ - -o 0.0 -``` - -### Set Costs with Custom API URL -```bash -tg-set-token-costs \ - --model "gpt-3.5-turbo" \ - -i 0.5 \ - -o 1.5 \ - -u http://production:8088/ -``` - -## Model Pricing Examples - -### OpenAI Models (as of 2024) -```bash -# GPT-4 Turbo -tg-set-token-costs --model "gpt-4-turbo" -i 10.0 -o 30.0 - -# GPT-4 -tg-set-token-costs --model "gpt-4" -i 30.0 -o 60.0 - -# GPT-3.5 Turbo -tg-set-token-costs --model "gpt-3.5-turbo" -i 0.5 -o 1.5 -``` - -### Anthropic Models -```bash -# Claude 3 Opus -tg-set-token-costs --model "claude-3-opus" -i 15.0 -o 75.0 - -# Claude 3 Sonnet -tg-set-token-costs --model "claude-3-sonnet" -i 3.0 -o 15.0 - -# Claude 3 Haiku -tg-set-token-costs --model "claude-3-haiku" -i 0.25 -o 1.25 -``` - -### Google Models -```bash -# Gemini Pro -tg-set-token-costs --model "gemini-pro" -i 0.5 -o 1.5 - -# Gemini Ultra -tg-set-token-costs --model "gemini-ultra" -i 8.0 -o 24.0 -``` - -### Local/Open Source Models -```bash -# Local models typically have no API costs -tg-set-token-costs --model "llama-2-70b" -i 0.0 -o 0.0 -tg-set-token-costs --model "mistral-7b" -i 0.0 -o 0.0 -tg-set-token-costs --model "local-model" -i 0.0 -o 0.0 -``` - -## Use Cases - -### Cost Tracking Setup -```bash -# Set up comprehensive cost tracking -models=( - "gpt-4:30.0:60.0" - "gpt-3.5-turbo:0.5:1.5" - "claude-3-sonnet:3.0:15.0" - "claude-3-haiku:0.25:1.25" -) - -for model_config in "${models[@]}"; do - IFS=':' read -r model input_cost output_cost <<< "$model_config" - echo "Setting costs for $model..." - tg-set-token-costs --model "$model" -i "$input_cost" -o "$output_cost" -done -``` - -### Environment-Specific Pricing -```bash -# Set different costs for different environments -set_environment_costs() { - local env_url="$1" - local multiplier="$2" # Cost multiplier for environment - - echo "Setting costs for environment: $env_url (multiplier: $multiplier)" - - # Base costs - declare -A base_costs=( - ["gpt-4"]="30.0:60.0" - ["claude-3-sonnet"]="3.0:15.0" - ["gpt-3.5-turbo"]="0.5:1.5" - ) - - for model in "${!base_costs[@]}"; do - IFS=':' read -r input_cost output_cost <<< "${base_costs[$model]}" - - # Apply multiplier - adjusted_input=$(echo "$input_cost * $multiplier" | bc -l) - adjusted_output=$(echo "$output_cost * $multiplier" | bc -l) - - echo " $model: input=$adjusted_input, output=$adjusted_output" - tg-set-token-costs \ - --model "$model" \ - -i "$adjusted_input" \ - -o "$adjusted_output" \ - -u "$env_url" - done -} - -# Production environment (full cost) -set_environment_costs "http://prod:8088/" 1.0 - -# Development environment (reduced cost for budgeting) -set_environment_costs "http://dev:8088/" 0.1 -``` - -### Cost Update Automation -```bash -# Automated cost updates from pricing file -update_costs_from_file() { - local pricing_file="$1" - - if [ ! -f "$pricing_file" ]; then - echo "Pricing file not found: $pricing_file" - return 1 - fi - - echo "Updating costs from: $pricing_file" - - # Expected format: model_id,input_cost,output_cost - while IFS=',' read -r model input_cost output_cost; do - # Skip header line - if [ "$model" = "model_id" ]; then - continue - fi - - echo "Updating $model: input=$input_cost, output=$output_cost" - tg-set-token-costs --model "$model" -i "$input_cost" -o "$output_cost" - - done < "$pricing_file" -} - -# Create example pricing file -cat > model_pricing.csv << EOF -model_id,input_cost,output_cost -gpt-4,30.0,60.0 -gpt-3.5-turbo,0.5,1.5 -claude-3-sonnet,3.0,15.0 -claude-3-haiku,0.25,1.25 -EOF - -# Update costs from file -update_costs_from_file "model_pricing.csv" -``` - -### Bulk Cost Management -```bash -# Bulk cost updates with validation -bulk_cost_update() { - local updates=( - "gpt-4-turbo:10.0:30.0" - "gpt-4:30.0:60.0" - "claude-3-opus:15.0:75.0" - "claude-3-sonnet:3.0:15.0" - "gemini-pro:0.5:1.5" - ) - - echo "Bulk cost update starting..." - - for update in "${updates[@]}"; do - IFS=':' read -r model input_cost output_cost <<< "$update" - - # Validate costs are numeric - if ! [[ "$input_cost" =~ ^[0-9]+\.?[0-9]*$ ]] || ! [[ "$output_cost" =~ ^[0-9]+\.?[0-9]*$ ]]; then - echo "Error: Invalid cost format for $model" - continue - fi - - echo "Setting costs for $model..." - if tg-set-token-costs --model "$model" -i "$input_cost" -o "$output_cost"; then - echo "✓ Updated $model" - else - echo "✗ Failed to update $model" - fi - done - - echo "Bulk update completed" -} - -bulk_cost_update -``` - -## Advanced Usage - -### Cost Tier Management -```bash -# Manage different cost tiers -set_cost_tier() { - local tier="$1" - - case "$tier" in - "premium") - echo "Setting premium tier costs..." - tg-set-token-costs --model "gpt-4" -i 30.0 -o 60.0 - tg-set-token-costs --model "claude-3-opus" -i 15.0 -o 75.0 - ;; - "standard") - echo "Setting standard tier costs..." - tg-set-token-costs --model "gpt-3.5-turbo" -i 0.5 -o 1.5 - tg-set-token-costs --model "claude-3-sonnet" -i 3.0 -o 15.0 - ;; - "budget") - echo "Setting budget tier costs..." - tg-set-token-costs --model "claude-3-haiku" -i 0.25 -o 1.25 - tg-set-token-costs --model "local-model" -i 0.0 -o 0.0 - ;; - *) - echo "Unknown tier: $tier" - echo "Available tiers: premium, standard, budget" - return 1 - ;; - esac -} - -# Set costs for different tiers -set_cost_tier "premium" -set_cost_tier "standard" -set_cost_tier "budget" -``` - -### Dynamic Pricing Updates -```bash -# Update costs based on current market rates -update_dynamic_pricing() { - local pricing_api_url="$1" # Hypothetical pricing API - - echo "Fetching current pricing from: $pricing_api_url" - - # This would integrate with actual pricing APIs - # For demonstration, using static data - - declare -A current_prices=( - ["gpt-4"]="30.0:60.0" - ["gpt-3.5-turbo"]="0.5:1.5" - ["claude-3-sonnet"]="3.0:15.0" - ) - - for model in "${!current_prices[@]}"; do - IFS=':' read -r input_cost output_cost <<< "${current_prices[$model]}" - - echo "Updating $model with current market rates..." - tg-set-token-costs --model "$model" -i "$input_cost" -o "$output_cost" - done -} -``` - -### Cost Validation -```bash -# Validate cost settings -validate_costs() { - local model="$1" - local input_cost="$2" - local output_cost="$3" - - echo "Validating costs for $model..." - - # Check cost reasonableness - if (( $(echo "$input_cost < 0" | bc -l) )); then - echo "Error: Input cost cannot be negative" - return 1 - fi - - if (( $(echo "$output_cost < 0" | bc -l) )); then - echo "Error: Output cost cannot be negative" - return 1 - fi - - # Check if output cost is typically higher - if (( $(echo "$output_cost < $input_cost" | bc -l) )); then - echo "Warning: Output cost is lower than input cost (unusual but not invalid)" - fi - - # Check for extremely high costs - if (( $(echo "$input_cost > 100" | bc -l) )) || (( $(echo "$output_cost > 200" | bc -l) )); then - echo "Warning: Costs are unusually high" - fi - - echo "Validation passed for $model" - return 0 -} - -# Validate before setting -if validate_costs "gpt-4" 30.0 60.0; then - tg-set-token-costs --model "gpt-4" -i 30.0 -o 60.0 -fi -``` - -## Error Handling - -### Missing Required Arguments -```bash -Exception: error: the following arguments are required: --model, -i/--input-costs, -o/--output-costs -``` -**Solution**: Provide all required arguments: model ID, input cost, and output cost. - -### Invalid Cost Values -```bash -Exception: argument -i/--input-costs: invalid float value -``` -**Solution**: Ensure cost values are valid numbers (e.g., 1.5, not "1.5a"). - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Configuration Access Errors -```bash -Exception: Access denied to configuration -``` -**Solution**: Verify user permissions for configuration management. - -## Cost Monitoring Integration - -### Cost Verification -```bash -# Verify costs were set correctly -verify_costs() { - local model="$1" - - echo "Verifying costs for model: $model" - - # Check current settings - if costs=$(tg-show-token-costs | grep "$model"); then - echo "Current costs: $costs" - else - echo "Error: No costs found for model $model" - return 1 - fi -} - -# Set and verify -tg-set-token-costs --model "test-model" -i 1.0 -o 2.0 -verify_costs "test-model" -``` - -### Cost Reporting Integration -```bash -# Generate cost report after updates -generate_cost_report() { - local report_file="cost_report_$(date +%Y%m%d_%H%M%S).txt" - - echo "Cost Configuration Report - $(date)" > "$report_file" - echo "======================================" >> "$report_file" - - tg-show-token-costs >> "$report_file" - - echo "Report generated: $report_file" -} - -# Update costs and generate report -tg-set-token-costs --model "gpt-4" -i 30.0 -o 60.0 -generate_cost_report -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-token-costs`](tg-show-token-costs.md) - Display current token costs -- [`tg-show-config`](tg-show-config.md) - Show configuration settings (if available) - -## API Integration - -This command uses the [Config API](../apis/api-config.md) to store token cost configuration in TrustGraph's configuration system. - -## Best Practices - -1. **Regular Updates**: Keep costs current with market rates -2. **Validation**: Validate cost values before setting -3. **Documentation**: Document cost sources and update procedures -4. **Environment Consistency**: Maintain consistent costs across environments -5. **Monitoring**: Track cost changes over time -6. **Backup**: Export cost configurations for backup -7. **Automation**: Automate cost updates where possible - -## Troubleshooting - -### Costs Not Taking Effect -```bash -# Verify costs were set -tg-show-token-costs | grep "model-name" - -# Check API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/config" > /dev/null -``` - -### Incorrect Cost Calculations -```bash -# Verify cost format (per million tokens) -# $30 per million tokens = 30.0, not 0.00003 - -# Check decimal precision -echo "scale=6; 30/1000000" | bc -l # This gives cost per token -``` - -### Permission Issues -```bash -# Check configuration access -tg-show-token-costs - -# Verify user has admin privileges for cost management -``` \ No newline at end of file diff --git a/docs/cli/tg-set-tool.md b/docs/cli/tg-set-tool.md deleted file mode 100644 index 00a15b2b..00000000 --- a/docs/cli/tg-set-tool.md +++ /dev/null @@ -1,322 +0,0 @@ -# tg-set-tool - -## Synopsis - -``` -tg-set-tool [OPTIONS] --id ID --name NAME --type TYPE --description DESCRIPTION [--argument ARG...] -``` - -## Description - -The `tg-set-tool` command configures and registers tools in the TrustGraph system. It allows defining tool metadata including ID, name, description, type, and argument specifications. Tools are stored in the agent configuration and indexed for discovery and execution. - -This command is useful for: -- Registering new tools for agent use -- Updating existing tool configurations -- Defining tool arguments and parameter types -- Managing the tool registry for agent workflows - -The command updates both the tool index and stores the complete tool configuration in the TrustGraph API. - -## Options - -- `-u, --api-url URL` - - TrustGraph API URL for configuration storage - - Default: `http://localhost:8088/` (or `TRUSTGRAPH_URL` environment variable) - - Should point to a running TrustGraph API instance - -- `--id ID` - - **Required.** Unique identifier for the tool - - Used to reference the tool in configurations and agent workflows - - Must be unique within the tool registry - -- `--name NAME` - - **Required.** Tool name used by agents to invoke this tool - - Must be a valid function identifier (use snake_case, no spaces or special characters) - - Examples: `get_weather`, `calculate_distance`, `search_documents` - -- `--type TYPE` - - **Required.** Tool type defining its functionality - - Valid types: - - `knowledge-query` - Query knowledge bases - - `text-completion` - Text completion/generation - - `mcp-tool` - Model Control Protocol tool - -- `--description DESCRIPTION` - - **Required.** Detailed description of what the tool does - - Used by agents to understand tool capabilities - - Should clearly explain the tool's purpose and function - -- `--argument ARG` - - Tool argument specification in format: `name:type:description` - - Can be specified multiple times for multiple arguments - - Valid argument types: - - `string` - String/text parameter - - `number` - Numeric parameter - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic Tool Registration - -Register a simple weather lookup tool: -```bash -tg-set-tool --id weather_tool --name get_weather \ - --type knowledge-query \ - --description "Get current weather information" \ - --argument location:string:"Location to query" \ - --argument units:string:"Temperature units (C/F)" -``` - -### Calculator Tool - -Register a calculator tool with MCP type: -```bash -tg-set-tool --id calc_tool --name calculate \ - --type mcp-tool \ - --description "Perform mathematical calculations" \ - --argument expression:string:"Mathematical expression to evaluate" -``` - -### Text Completion Tool - -Register a text completion tool: -```bash -tg-set-tool --id text_gen_tool --name generate_text \ - --type text-completion \ - --description "Generate text based on prompts" \ - --argument prompt:string:"Text prompt for generation" \ - --argument max_tokens:number:"Maximum tokens to generate" -``` - -### Custom API URL - -Register a tool with custom API endpoint: -```bash -tg-set-tool -u http://trustgraph.example.com:8088/ \ - --id custom_tool --name custom_search \ - --type knowledge-query \ - --description "Custom tool functionality" -``` - -### Tool Without Arguments - -Register a simple tool with no arguments: -```bash -tg-set-tool --id status_tool --name check_status \ - --type knowledge-query \ - --description "Check system status" -``` - -## Tool Types - -### knowledge-query -Tools that query knowledge bases, databases, or information systems: -- Used for information retrieval -- Typically return structured data or search results -- Examples: web search, document lookup, database queries - -### text-completion -Tools that generate or complete text: -- Used for text generation tasks -- Process prompts and return generated content -- Examples: language models, text generators, summarizers - -### mcp-tool -Model Control Protocol tools: -- Standardized tool interface for AI models -- Support complex interactions and state management -- Examples: external API integrations, complex workflows - -## Argument Types - -### string -Text or string parameters: -- Accept any text input -- Used for queries, prompts, identifiers -- Should include clear description of expected format - -### number -Numeric parameters: -- Accept integer or floating-point values -- Used for limits, thresholds, quantities -- Should specify valid ranges when applicable - -## Configuration Storage - -The tool configuration is stored in two parts: - -1. **Tool Index** (`agent.tool-index`) - - List of all registered tool IDs - - Updated to include new tools - - Used for tool discovery - -2. **Tool Configuration** (`agent.tool.{id}`) - - Complete tool definition as JSON - - Includes metadata and argument specifications - - Used for tool execution and validation - -## Advanced Usage - -### Updating Existing Tools - -Update an existing tool configuration: -```bash -# Update tool description -tg-set-tool --id weather --name "Weather Lookup" \ - --type knowledge-query \ - --description "Updated weather information service" \ - --argument location:string:"Location to query" -``` - -### Batch Tool Registration - -Register multiple tools in a script: -```bash -#!/bin/bash -# Register a suite of tools -tg-set-tool --id search --name "Web Search" --type knowledge-query \ - --description "Search the web" \ - --argument query:string:"Search query" - -tg-set-tool --id summarize --name "Text Summarizer" --type text-completion \ - --description "Summarize text content" \ - --argument text:string:"Text to summarize" - -tg-set-tool --id translate --name "Translator" --type mcp-tool \ - --description "Translate text between languages" \ - --argument text:string:"Text to translate" \ - --argument target_lang:string:"Target language" -``` - -### Tool Validation - -Verify tool registration: -```bash -# Register tool and verify -tg-set-tool --id test-tool --name "Test Tool" \ - --type knowledge-query \ - --description "Test tool for validation" - -# Check if tool was registered -tg-show-tools | grep test-tool -``` - -## Error Handling - -The command handles various error conditions: - -- **Missing required arguments**: All required fields must be provided -- **Invalid tool types**: Only valid types are accepted -- **Invalid argument format**: Arguments must follow `name:type:description` format -- **API connection errors**: If the TrustGraph API is unavailable -- **Configuration errors**: If tool data cannot be stored - -Common error scenarios: -```bash -# Missing required field -tg-set-tool --id tool1 --name "Tool 1" -# Output: Exception: Must specify --type for tool - -# Invalid tool type -tg-set-tool --id tool1 --name "Tool 1" --type invalid-type -# Output: Exception: Type must be one of: knowledge-query, text-completion, mcp-tool - -# Invalid argument format -tg-set-tool --id tool1 --name "Tool 1" --type knowledge-query \ - --argument "bad-format" -# Output: Exception: Arguments should be form name:type:description -``` - -## Integration with Other Commands - -### With Tool Management - -View registered tools: -```bash -# Register tool -tg-set-tool --id new-tool --name "New Tool" \ - --type knowledge-query \ - --description "Newly registered tool" - -# View all tools -tg-show-tools -``` - -### With Agent Invocation - -Use registered tools with agents: -```bash -# Register tool -tg-set-tool --id weather --name "Weather" \ - --type knowledge-query \ - --description "Weather lookup" - -# Use tool in agent workflow -tg-invoke-agent --prompt "What's the weather in London?" -``` - -### With Flow Configuration - -Tools can be used in flow configurations: -```bash -# Register tool for flow use -tg-set-tool --id data-processor --name "Data Processor" \ - --type mcp-tool \ - --description "Process data in flows" - -# View flows that might use the tool -tg-show-flows -``` - -## Best Practices - -1. **Clear Naming**: Use descriptive, unique tool IDs and names -2. **Detailed Descriptions**: Provide comprehensive tool descriptions -3. **Argument Documentation**: Clearly describe each argument's purpose -4. **Type Selection**: Choose appropriate tool types for functionality -5. **Validation**: Test tools after registration -6. **Version Management**: Track tool configuration changes -7. **Documentation**: Document custom tools and their usage - -## Troubleshooting - -### Tool Not Appearing - -If a registered tool doesn't appear in listings: -1. Verify the tool was registered successfully -2. Check the tool index with `tg-show-tools` -3. Ensure the API URL is correct -4. Verify TrustGraph API is running - -### Tool Registration Errors - -If tool registration fails: -1. Check all required arguments are provided -2. Verify argument format is correct -3. Ensure tool type is valid -4. Check API connectivity -5. Review error messages for specific issues - -### Tool Configuration Issues - -If tools aren't working as expected: -1. Verify tool arguments are correctly specified -2. Check tool type matches intended functionality -3. Ensure tool implementation is available -4. Review agent logs for tool execution errors - -## Related Commands - -- [`tg-show-tools`](tg-show-tools.md) - Display registered tools -- [`tg-delete-tool`](tg-delete-tool.md) - Remove tool configurations -- [`tg-set-mcp-tool`](tg-set-mcp-tool.md) - Configure MCP tools -- [`tg-invoke-agent`](tg-invoke-agent.md) - Use tools with agents - -## See Also - -- TrustGraph Tool Development Guide -- Agent Configuration Documentation -- MCP Tool Integration Guide diff --git a/docs/cli/tg-show-config.md b/docs/cli/tg-show-config.md deleted file mode 100644 index ac870a7f..00000000 --- a/docs/cli/tg-show-config.md +++ /dev/null @@ -1,170 +0,0 @@ -# tg-show-config - -Displays the current TrustGraph system configuration. - -## Synopsis - -```bash -tg-show-config [options] -``` - -## Description - -The `tg-show-config` command retrieves and displays the complete TrustGraph system configuration in JSON format. This includes flow definitions, service configurations, and other system settings stored in the configuration service. - -This is particularly useful for: -- Understanding the current system setup -- Debugging configuration issues -- Finding queue names for Pulsar integration -- Verifying flow definitions and interfaces - -## Options - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Display Complete Configuration -```bash -tg-show-config -``` - -### Using Custom API URL -```bash -tg-show-config -u http://production:8088/ -``` - -## Output Format - -The command outputs the configuration version followed by the complete configuration in JSON format: - -``` -Version: 42 -{ - "flows": { - "default": { - "blueprint-name": "document-rag+graph-rag", - "description": "Default processing flow", - "interfaces": { - "agent": { - "request": "non-persistent://tg/request/agent:default", - "response": "non-persistent://tg/response/agent:default" - }, - "graph-rag": { - "request": "non-persistent://tg/request/graph-rag:document-rag+graph-rag", - "response": "non-persistent://tg/response/graph-rag:document-rag+graph-rag" - }, - "text-load": "persistent://tg/flow/text-document-load:default", - ... - } - } - }, - "prompts": { - "system": "You are a helpful AI assistant...", - "graph-rag": "Answer the question using the provided context..." - }, - "token-costs": { - "gpt-4": { - "prompt": 0.03, - "completion": 0.06 - } - }, - ... -} -``` - -## Configuration Sections - -### Flow Definitions -Flow configurations showing: -- **blueprint-name**: The flow blueprint being used -- **description**: Human-readable flow description -- **interfaces**: Pulsar queue names for each service - -### Prompt Templates -System and service-specific prompt templates used by AI services. - -### Token Costs -Model pricing information for cost tracking and billing. - -### Service Settings -Various service-specific configuration parameters. - -## Finding Queue Names - -The configuration output is essential for discovering Pulsar queue names: - -### Flow-Hosted Services -Look in the `flows` section under `interfaces`: - -```json -"graph-rag": { - "request": "non-persistent://tg/request/graph-rag:document-rag+graph-rag", - "response": "non-persistent://tg/response/graph-rag:document-rag+graph-rag" -} -``` - -### Fire-and-Forget Services -Some services only have input queues: - -```json -"text-load": "persistent://tg/flow/text-document-load:default" -``` - -## Error Handling - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Authentication Errors -```bash -Exception: Unauthorized -``` -**Solution**: Check authentication credentials and permissions. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Update flow blueprint definitions -- [`tg-show-flows`](tg-show-flows.md) - List active flows -- [`tg-set-prompt`](tg-set-prompt.md) - Configure prompt templates -- [`tg-set-token-costs`](tg-set-token-costs.md) - Configure token costs - -## API Integration - -This command uses the [Config API](../apis/api-config.md) with the `config` operation to retrieve the complete system configuration. - -**API Call:** -```json -{ - "operation": "config" -} -``` - -## Use Cases - -### Development and Debugging -- Verify flow configurations are correct -- Check that services have proper queue assignments -- Debug configuration-related issues - -### System Administration -- Monitor configuration changes over time -- Document current system setup -- Prepare for system migrations - -### Integration Development -- Discover Pulsar queue names for direct integration -- Understand service interfaces and capabilities -- Verify API endpoint configurations - -### Troubleshooting -- Check if flows are properly configured -- Verify prompt templates are set correctly -- Confirm token cost configurations \ No newline at end of file diff --git a/docs/cli/tg-show-flow-blueprints.md b/docs/cli/tg-show-flow-blueprints.md deleted file mode 100644 index 40667dda..00000000 --- a/docs/cli/tg-show-flow-blueprints.md +++ /dev/null @@ -1,330 +0,0 @@ -# tg-show-flow-blueprints - -Lists all defined flow blueprintes in TrustGraph with their descriptions and tags. - -## Synopsis - -```bash -tg-show-flow-blueprints [options] -``` - -## Description - -The `tg-show-flow-blueprints` command displays a formatted table of all flow blueprint definitions currently stored in TrustGraph. Each flow blueprint is shown with its name, description, and associated tags. - -Flow blueprintes are templates that define the structure and services available for creating flow instances. This command helps you understand what flow blueprintes are available for use. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### List All Flow Blueprintes -```bash -tg-show-flow-blueprints -``` - -Output: -``` -+-----------------+----------------------------------+----------------------+ -| flow blueprint | description | tags | -+-----------------+----------------------------------+----------------------+ -| document-proc | Document processing pipeline | production, nlp | -| data-analysis | Data analysis and visualization | analytics, dev | -| web-scraper | Web content extraction flow | scraping, batch | -| chat-assistant | Conversational AI assistant | ai, interactive | -+-----------------+----------------------------------+----------------------+ -``` - -### Using Custom API URL -```bash -tg-show-flow-blueprints -u http://production:8088/ -``` - -### Filter Flow Blueprintes -```bash -# Show only production-tagged flow blueprintes -tg-show-flow-blueprints | grep "production" - -# Count total flow blueprintes -tg-show-flow-blueprints | grep -c "^|" - -# Show flow blueprintes with specific patterns -tg-show-flow-blueprints | grep -E "(document|text|nlp)" -``` - -## Output Format - -The command displays results in a formatted table with columns: - -- **flow blueprint**: The unique name/identifier of the flow blueprint -- **description**: Human-readable description of the flow blueprint purpose -- **tags**: Comma-separated list of categorization tags - -### Empty Results -If no flow blueprintes exist: -``` -No flows. -``` - -## Use Cases - -### Flow Blueprint Discovery -```bash -# Find available flow blueprintes for document processing -tg-show-flow-blueprints | grep -i document - -# List all AI-related flow blueprintes -tg-show-flow-blueprints | grep -i "ai\|nlp\|chat\|assistant" - -# Find development vs production flow blueprintes -tg-show-flow-blueprints | grep -E "(dev|test|staging)" -tg-show-flow-blueprints | grep "production" -``` - -### Flow Blueprint Management -```bash -# Get list of flow blueprint names for scripting -tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); print $2}' | grep -v "^$" - -# Check if specific flow blueprint exists -if tg-show-flow-blueprints | grep -q "target-flow"; then - echo "Flow blueprint 'target-flow' exists" -else - echo "Flow blueprint 'target-flow' not found" -fi -``` - -### Environment Comparison -```bash -# Compare flow blueprintes between environments -echo "Development environment:" -tg-show-flow-blueprints -u http://dev:8088/ - -echo "Production environment:" -tg-show-flow-blueprints -u http://prod:8088/ -``` - -### Reporting and Documentation -```bash -# Generate flow blueprint inventory report -echo "Flow Blueprint Inventory - $(date)" > flow-inventory.txt -echo "=====================================" >> flow-inventory.txt -tg-show-flow-blueprints >> flow-inventory.txt - -# Create CSV export -echo "flow_class,description,tags" > flow-classes.csv -tg-show-flow-blueprints | awk 'NR>3 && /^\|/ { - gsub(/^\| */, "", $0); gsub(/ *\|$/, "", $0); - gsub(/ *\| */, ",", $0); print $0 -}' >> flow-classes.csv -``` - -## Error Handling - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied to list flow blueprintes -``` -**Solution**: Verify user permissions for reading flow blueprint definitions. - -### Network Timeouts -```bash -Exception: Request timeout -``` -**Solution**: Check network connectivity and API server status. - -## Integration with Other Commands - -### Flow Blueprint Lifecycle -```bash -# 1. List available flow blueprintes -tg-show-flow-blueprints - -# 2. Get details of specific flow blueprint -tg-get-flow-blueprint -n "interesting-flow" - -# 3. Start flow instance from class -tg-start-flow -n "interesting-flow" -i "my-instance" - -# 4. Monitor flow instance -tg-show-flows | grep "my-instance" -``` - -### Bulk Operations -```bash -# Process all flow blueprintes -tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - echo "Processing flow blueprint: $class_name" - tg-get-flow-blueprint -n "$class_name" > "backup-$class_name.json" - fi -done -``` - -### Automated Validation -```bash -# Check flow blueprint health -echo "Validating flow blueprintes..." -tg-show-flow-blueprints | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - echo -n "Checking $class_name... " - if tg-get-flow-blueprint -n "$class_name" > /dev/null 2>&1; then - echo "OK" - else - echo "ERROR" - fi - fi -done -``` - -## Advanced Usage - -### Flow Blueprint Analysis -```bash -# Analyze flow blueprint distribution by tags -tg-show-flow-blueprints | awk 'NR>3 && /^\|/ { - # Extract tags column - split($0, parts, "|"); - tags = parts[4]; - gsub(/^ *| *$/, "", tags); - if (tags) { - split(tags, tag_array, ","); - for (i in tag_array) { - gsub(/^ *| *$/, "", tag_array[i]); - if (tag_array[i]) print tag_array[i]; - } - } -}' | sort | uniq -c | sort -nr -``` - -### Environment Synchronization -```bash -# Sync flow blueprintes between environments -echo "Synchronizing flow blueprintes from dev to staging..." - -# Get list from development -dev_classes=$(tg-show-flow-blueprints -u http://dev:8088/ | \ - awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}') - -# Check each class in staging -for class in $dev_classes; do - if tg-show-flow-blueprints -u http://staging:8088/ | grep -q "$class"; then - echo "$class: Already exists in staging" - else - echo "$class: Missing in staging - needs sync" - # Get from dev and put to staging - tg-get-flow-blueprint -n "$class" -u http://dev:8088/ > temp-class.json - tg-put-flow-blueprint -n "$class" -c "$(cat temp-class.json)" -u http://staging:8088/ - rm temp-class.json - fi -done -``` - -### Monitoring Script -```bash -#!/bin/bash -# monitor-flow-classes.sh -api_url="${1:-http://localhost:8088/}" - -echo "Flow Blueprint Monitoring Report - $(date)" -echo "API URL: $api_url" -echo "----------------------------------------" - -# Total count -total=$(tg-show-flow-blueprints -u "$api_url" | grep -c "^|" 2>/dev/null || echo "0") -echo "Total flow blueprintes: $((total - 3))" # Subtract header rows - -# Tag analysis -echo -e "\nTag distribution:" -tg-show-flow-blueprints -u "$api_url" | awk 'NR>3 && /^\|/ { - split($0, parts, "|"); - tags = parts[4]; - gsub(/^ *| *$/, "", tags); - if (tags) { - split(tags, tag_array, ","); - for (i in tag_array) { - gsub(/^ *| *$/, "", tag_array[i]); - if (tag_array[i]) print tag_array[i]; - } - } -}' | sort | uniq -c | sort -nr - -# Health check -echo -e "\nHealth check:" -healthy=0 -unhealthy=0 -tg-show-flow-blueprints -u "$api_url" | awk 'NR>3 && /^\|/ {gsub(/[| ]/, "", $2); if($2) print $2}' | \ -while read class_name; do - if [ -n "$class_name" ]; then - if tg-get-flow-blueprint -n "$class_name" -u "$api_url" > /dev/null 2>&1; then - healthy=$((healthy + 1)) - else - unhealthy=$((unhealthy + 1)) - echo " ERROR: $class_name" - fi - fi -done - -echo "Healthy: $healthy, Unhealthy: $unhealthy" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-get-flow-blueprint`](tg-get-flow-blueprint.md) - Retrieve specific flow blueprint definitions -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Create/update flow blueprint definitions -- [`tg-delete-flow-blueprint`](tg-delete-flow-blueprint.md) - Delete flow blueprint definitions -- [`tg-start-flow`](tg-start-flow.md) - Create flow instances from classes -- [`tg-show-flows`](tg-show-flows.md) - List active flow instances - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `list-classes` operation to retrieve flow blueprint listings. - -## Best Practices - -1. **Regular Inventory**: Periodically review available flow blueprintes -2. **Documentation**: Ensure flow blueprintes have meaningful descriptions -3. **Tagging**: Use consistent tagging for better organization -4. **Cleanup**: Remove unused or deprecated flow blueprintes -5. **Monitoring**: Include flow blueprint health checks in monitoring -6. **Environment Parity**: Keep flow blueprintes synchronized across environments - -## Troubleshooting - -### No Output -```bash -# If command returns no output, check API connectivity -tg-show-flow-blueprints -u http://localhost:8088/ -# Verify TrustGraph is running and accessible -``` - -### Formatting Issues -```bash -# If table formatting is broken, check terminal width -export COLUMNS=120 -tg-show-flow-blueprints -``` - -### Missing Flow Blueprintes -```bash -# If expected flow blueprintes are missing, verify: -# 1. Correct API URL -# 2. Database connectivity -# 3. Flow blueprint definitions are properly stored -``` \ No newline at end of file diff --git a/docs/cli/tg-show-flow-state.md b/docs/cli/tg-show-flow-state.md deleted file mode 100644 index dc24d700..00000000 --- a/docs/cli/tg-show-flow-state.md +++ /dev/null @@ -1,518 +0,0 @@ -# tg-show-flow-state - -Displays the processor states for a specific flow and its associated flow blueprint. - -## Synopsis - -```bash -tg-show-flow-state [options] -``` - -## Description - -The `tg-show-flow-state` command shows the current state of processors within a specific TrustGraph flow instance and its corresponding flow blueprint. It queries the metrics system to determine which processing components are running and displays their status with visual indicators. - -This command is essential for monitoring flow health and debugging processing issues. - -## Options - -### Optional Arguments - -- `-f, --flow-id ID`: Flow instance ID to examine (default: `default`) -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-m, --metrics-url URL`: Metrics API URL (default: `http://localhost:8088/api/metrics`) - -## Examples - -### Check Default Flow State -```bash -tg-show-flow-state -``` - -### Check Specific Flow -```bash -tg-show-flow-state -f "production-flow" -``` - -### Use Custom Metrics URL -```bash -tg-show-flow-state \ - -f "research-flow" \ - -m "http://metrics-server:8088/api/metrics" -``` - -### Check Flow in Different Environment -```bash -tg-show-flow-state \ - -f "staging-flow" \ - -u "http://staging:8088/" \ - -m "http://staging:8088/api/metrics" -``` - -## Output Format - -The command displays processor states for both the flow instance and its flow blueprint: - -``` -Flow production-flow -- pdf-processor 💚 -- text-extractor 💚 -- embeddings-generator 💚 -- knowledge-builder ❌ -- document-indexer 💚 - -Class document-processing-v2 -- base-pdf-processor 💚 -- base-text-extractor 💚 -- base-embeddings-generator 💚 -- base-knowledge-builder 💚 -- base-document-indexer 💚 -``` - -### Status Indicators -- **💚 (Green Heart)**: Processor is running and healthy -- **❌ (Red X)**: Processor is not running or unhealthy - -### Information Displayed -- **Flow Section**: Shows the state of processors in the specific flow instance -- **Class Section**: Shows the state of processors in the flow blueprint template -- **Processor Names**: Individual processing components within the flow - -## Use Cases - -### Flow Health Monitoring -```bash -# Monitor flow health continuously -monitor_flow_health() { - local flow_id="$1" - local interval="${2:-30}" # Default 30 seconds - - echo "Monitoring flow health: $flow_id" - echo "Refresh interval: ${interval}s" - echo "Press Ctrl+C to stop" - - while true; do - clear - echo "Flow Health Monitor - $(date)" - echo "==============================" - - tg-show-flow-state -f "$flow_id" - - sleep "$interval" - done -} - -# Monitor production flow -monitor_flow_health "production-flow" 15 -``` - -### Debugging Processing Issues -```bash -# Comprehensive flow debugging -debug_flow_issues() { - local flow_id="$1" - - echo "Debugging flow: $flow_id" - echo "=======================" - - # Check flow state - echo "1. Processor States:" - tg-show-flow-state -f "$flow_id" - - # Check flow configuration - echo -e "\n2. Flow Configuration:" - tg-show-flows | grep "$flow_id" - - # Check active processing - echo -e "\n3. Active Processing:" - tg-show-flows | grep -i processing - - # Check system resources - echo -e "\n4. System Resources:" - free -h - df -h - - echo -e "\nDebugging complete for: $flow_id" -} - -# Debug specific flow -debug_flow_issues "problematic-flow" -``` - -### Multi-Flow Status Dashboard -```bash -# Create status dashboard for multiple flows -create_flow_dashboard() { - local flows=("$@") - - echo "TrustGraph Flow Dashboard - $(date)" - echo "===================================" - - for flow in "${flows[@]}"; do - echo -e "\n=== Flow: $flow ===" - tg-show-flow-state -f "$flow" 2>/dev/null || echo "Flow not found or inaccessible" - done - - echo -e "\n=== Summary ===" - echo "Total flows monitored: ${#flows[@]}" - echo "Dashboard generated: $(date)" -} - -# Monitor multiple flows -flows=("production-flow" "research-flow" "development-flow") -create_flow_dashboard "${flows[@]}" -``` - -### Automated Health Checks -```bash -# Automated health check with alerts -health_check_with_alerts() { - local flow_id="$1" - local alert_email="$2" - - echo "Performing health check for: $flow_id" - - # Capture flow state - flow_state=$(tg-show-flow-state -f "$flow_id" 2>&1) - - if [ $? -ne 0 ]; then - echo "ERROR: Failed to get flow state" - # Send alert email if configured - if [ -n "$alert_email" ]; then - echo "Flow $flow_id is not responding" | mail -s "TrustGraph Alert" "$alert_email" - fi - return 1 - fi - - # Check for failed processors - failed_count=$(echo "$flow_state" | grep -c "❌") - - if [ "$failed_count" -gt 0 ]; then - echo "WARNING: $failed_count processors are not running" - echo "$flow_state" - - # Send alert if configured - if [ -n "$alert_email" ]; then - echo -e "Flow $flow_id has $failed_count failed processors:\n\n$flow_state" | \ - mail -s "TrustGraph Health Alert" "$alert_email" - fi - return 1 - else - echo "✓ All processors are running normally" - return 0 - fi -} - -# Run health check -health_check_with_alerts "production-flow" "admin@company.com" -``` - -## Advanced Usage - -### Flow State Comparison -```bash -# Compare flow states between environments -compare_flow_states() { - local flow_id="$1" - local env1_url="$2" - local env2_url="$3" - - echo "Comparing flow state: $flow_id" - echo "Environment 1: $env1_url" - echo "Environment 2: $env2_url" - echo "================================" - - # Get states from both environments - echo "Environment 1 State:" - tg-show-flow-state -f "$flow_id" -u "$env1_url" -m "$env1_url/api/metrics" - - echo -e "\nEnvironment 2 State:" - tg-show-flow-state -f "$flow_id" -u "$env2_url" -m "$env2_url/api/metrics" - - echo -e "\nComparison complete" -} - -# Compare production vs staging -compare_flow_states "main-flow" "http://prod:8088" "http://staging:8088" -``` - -### Historical State Tracking -```bash -# Track flow state over time -track_flow_state_history() { - local flow_id="$1" - local log_file="flow_state_history.log" - local interval="${2:-60}" # Default 1 minute - - echo "Starting flow state tracking: $flow_id" - echo "Log file: $log_file" - echo "Interval: ${interval}s" - - while true; do - timestamp=$(date '+%Y-%m-%d %H:%M:%S') - - # Get current state - state_output=$(tg-show-flow-state -f "$flow_id" 2>&1) - - if [ $? -eq 0 ]; then - # Count healthy and failed processors - healthy_count=$(echo "$state_output" | grep -c "💚") - failed_count=$(echo "$state_output" | grep -c "❌") - - # Log summary - echo "$timestamp,$flow_id,$healthy_count,$failed_count" >> "$log_file" - - # If there are failures, log details - if [ "$failed_count" -gt 0 ]; then - echo "$timestamp - FAILURES DETECTED in $flow_id:" >> "${log_file}.detailed" - echo "$state_output" >> "${log_file}.detailed" - echo "---" >> "${log_file}.detailed" - fi - else - echo "$timestamp,$flow_id,ERROR,ERROR" >> "$log_file" - fi - - sleep "$interval" - done -} - -# Start tracking (run in background) -track_flow_state_history "production-flow" 30 & -``` - -### State-Based Actions -```bash -# Perform actions based on flow state -state_based_actions() { - local flow_id="$1" - - echo "Checking flow state for automated actions: $flow_id" - - # Get current state - state_output=$(tg-show-flow-state -f "$flow_id") - - if [ $? -ne 0 ]; then - echo "ERROR: Cannot get flow state" - return 1 - fi - - # Check specific processors - if echo "$state_output" | grep -q "pdf-processor.*❌"; then - echo "PDF processor is down - attempting restart..." - # Restart specific processor (this would need additional commands) - # restart_processor "$flow_id" "pdf-processor" - fi - - if echo "$state_output" | grep -q "embeddings-generator.*❌"; then - echo "Embeddings generator is down - checking dependencies..." - # Check GPU availability, memory, etc. - nvidia-smi 2>/dev/null || echo "GPU not available" - fi - - # Count total failures - failed_count=$(echo "$state_output" | grep -c "❌") - - if [ "$failed_count" -gt 3 ]; then - echo "CRITICAL: More than 3 processors failed - considering flow restart" - # This would trigger more serious recovery actions - fi -} -``` - -### Performance Correlation -```bash -# Correlate flow state with performance metrics -correlate_state_performance() { - local flow_id="$1" - local metrics_url="$2" - - echo "Correlating flow state with performance for: $flow_id" - - # Get flow state - state_output=$(tg-show-flow-state -f "$flow_id" -m "$metrics_url") - healthy_count=$(echo "$state_output" | grep -c "💚") - failed_count=$(echo "$state_output" | grep -c "❌") - - echo "Processors - Healthy: $healthy_count, Failed: $failed_count" - - # Get performance metrics (this would need additional API calls) - # throughput=$(get_flow_throughput "$flow_id" "$metrics_url") - # latency=$(get_flow_latency "$flow_id" "$metrics_url") - - # echo "Performance - Throughput: ${throughput}/min, Latency: ${latency}ms" - - # Calculate health ratio - total_processors=$((healthy_count + failed_count)) - if [ "$total_processors" -gt 0 ]; then - health_ratio=$(echo "scale=2; $healthy_count * 100 / $total_processors" | bc) - echo "Health ratio: ${health_ratio}%" - fi -} -``` - -## Integration with Monitoring Systems - -### Prometheus Integration -```bash -# Export flow state metrics to Prometheus format -export_prometheus_metrics() { - local flow_id="$1" - local metrics_file="flow_state_metrics.prom" - - # Get flow state - state_output=$(tg-show-flow-state -f "$flow_id") - - # Count states - healthy_count=$(echo "$state_output" | grep -c "💚") - failed_count=$(echo "$state_output" | grep -c "❌") - - # Generate Prometheus metrics - cat > "$metrics_file" << EOF -# HELP trustgraph_flow_processors_healthy Number of healthy processors in flow -# TYPE trustgraph_flow_processors_healthy gauge -trustgraph_flow_processors_healthy{flow_id="$flow_id"} $healthy_count - -# HELP trustgraph_flow_processors_failed Number of failed processors in flow -# TYPE trustgraph_flow_processors_failed gauge -trustgraph_flow_processors_failed{flow_id="$flow_id"} $failed_count - -# HELP trustgraph_flow_health_ratio Ratio of healthy processors -# TYPE trustgraph_flow_health_ratio gauge -EOF - - total=$((healthy_count + failed_count)) - if [ "$total" -gt 0 ]; then - ratio=$(echo "scale=4; $healthy_count / $total" | bc) - echo "trustgraph_flow_health_ratio{flow_id=\"$flow_id\"} $ratio" >> "$metrics_file" - fi - - echo "Prometheus metrics exported to: $metrics_file" -} -``` - -### Grafana Dashboard Data -```bash -# Generate data for Grafana dashboard -generate_grafana_data() { - local flows=("$@") - local output_file="grafana_flow_data.json" - - echo "Generating Grafana dashboard data..." - - echo "{" > "$output_file" - echo " \"flows\": [" >> "$output_file" - - for i in "${!flows[@]}"; do - flow="${flows[$i]}" - - # Get flow state - state_output=$(tg-show-flow-state -f "$flow" 2>/dev/null) - - if [ $? -eq 0 ]; then - healthy=$(echo "$state_output" | grep -c "💚") - failed=$(echo "$state_output" | grep -c "❌") - else - healthy=0 - failed=0 - fi - - echo " {" >> "$output_file" - echo " \"flow_id\": \"$flow\"," >> "$output_file" - echo " \"healthy_processors\": $healthy," >> "$output_file" - echo " \"failed_processors\": $failed," >> "$output_file" - echo " \"timestamp\": \"$(date -Iseconds)\"" >> "$output_file" - - if [ $i -lt $((${#flows[@]} - 1)) ]; then - echo " }," >> "$output_file" - else - echo " }" >> "$output_file" - fi - done - - echo " ]" >> "$output_file" - echo "}" >> "$output_file" - - echo "Grafana data generated: $output_file" -} -``` - -## Error Handling - -### Flow Not Found -```bash -Exception: Flow 'nonexistent-flow' not found -``` -**Solution**: Verify the flow ID exists with `tg-show-flows`. - -### Metrics API Unavailable -```bash -Exception: Connection refused to metrics API -``` -**Solution**: Check metrics URL and ensure metrics service is running. - -### Permission Issues -```bash -Exception: Access denied to metrics -``` -**Solution**: Verify permissions for accessing metrics and flow information. - -### Invalid Flow State -```bash -Exception: Unable to parse flow state -``` -**Solution**: Check if the flow is properly initialized and processors are configured. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-flows`](tg-show-flows.md) - List all flows -- [`tg-show-processor-state`](tg-show-processor-state.md) - Show all processor states -- [`tg-start-flow`](tg-start-flow.md) - Start flow instances -- [`tg-stop-flow`](tg-stop-flow.md) - Stop flow instances - -## API Integration - -This command integrates with: -- TrustGraph Flow API for flow information -- Prometheus/Metrics API for processor state information - -## Best Practices - -1. **Regular Monitoring**: Check flow states regularly in production -2. **Automated Alerts**: Set up automated health checks with alerting -3. **Historical Tracking**: Maintain historical flow state data -4. **Integration**: Integrate with monitoring systems like Prometheus/Grafana -5. **Documentation**: Document expected processor configurations -6. **Correlation**: Correlate flow state with performance metrics -7. **Recovery Procedures**: Develop automated recovery procedures for common failures - -## Troubleshooting - -### No Processors Shown -```bash -# Check if flow exists -tg-show-flows | grep "flow-id" - -# Verify metrics service -curl -s http://localhost:8088/api/metrics/query?query=processor_info -``` - -### Inconsistent States -```bash -# Check metrics service health -curl -s http://localhost:8088/api/metrics/health - -# Restart metrics collection if needed -``` - -### Connection Issues -```bash -# Test API connectivity -curl -s http://localhost:8088/api/v1/flows - -# Test metrics connectivity -curl -s http://localhost:8088/api/metrics/query?query=up -``` \ No newline at end of file diff --git a/docs/cli/tg-show-flows.md b/docs/cli/tg-show-flows.md deleted file mode 100644 index 72a1c809..00000000 --- a/docs/cli/tg-show-flows.md +++ /dev/null @@ -1,207 +0,0 @@ -# tg-show-flows - -Shows configured flows with their interfaces and queue information. - -## Synopsis - -```bash -tg-show-flows [options] -``` - -## Description - -The `tg-show-flows` command displays all currently configured flow instances, including their identifiers, class names, descriptions, and available service interfaces with corresponding Pulsar queue names. - -This command is essential for understanding what flows are available, discovering service endpoints, and finding Pulsar queue names for direct API integration. - -## Options - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Show All Flows -```bash -tg-show-flows -``` - -### Using Custom API URL -```bash -tg-show-flows -u http://production:8088/ -``` - -## Output Format - -The command displays each flow in a formatted table with the following information: - -``` -+-------+---------------------------+ -| id | research-flow | -| class | document-rag+graph-rag | -| desc | Research document pipeline | -| queue | agent request: non-persistent://tg/request/agent:default | -| | agent response: non-persistent://tg/request/agent:default | -| | graph-rag request: non-persistent://tg/request/graph-rag:document-rag+graph-rag | -| | graph-rag response: non-persistent://tg/request/graph-rag:document-rag+graph-rag | -| | text-load: persistent://tg/flow/text-document-load:default | -+-------+---------------------------+ - -+-------+---------------------------+ -| id | medical-analysis | -| class | medical-nlp | -| desc | Medical document analysis | -| queue | embeddings request: non-persistent://tg/request/embeddings:medical-nlp | -| | embeddings response: non-persistent://tg/request/embeddings:medical-nlp | -| | document-load: persistent://tg/flow/document-load:medical-analysis | -+-------+---------------------------+ -``` - -### No Flows Available -```bash -No flows. -``` - -## Interface Types - -The queue information shows two types of service interfaces: - -### Request/Response Services -Services that accept requests and return responses: -``` -agent request: non-persistent://tg/request/agent:default -agent response: non-persistent://tg/response/agent:default -``` - -### Fire-and-Forget Services -Services that accept data without returning responses: -``` -text-load: persistent://tg/flow/text-document-load:default -``` - -## Service Interface Discovery - -Use this command to discover available services and their queue names: - -### Common Request/Response Services -- **agent**: Interactive Q&A service -- **graph-rag**: Graph-based retrieval augmented generation -- **document-rag**: Document-based retrieval augmented generation -- **text-completion**: LLM text completion service -- **prompt**: Prompt-based text generation -- **embeddings**: Text embedding generation -- **graph-embeddings**: Graph entity embeddings -- **triples**: Knowledge graph triple queries - -### Common Fire-and-Forget Services -- **text-load**: Text document loading -- **document-load**: Document file loading -- **triples-store**: Knowledge graph storage -- **graph-embeddings-store**: Graph embedding storage -- **document-embeddings-store**: Document embedding storage -- **entity-contexts-load**: Entity context loading - -## Queue Name Patterns - -### Flow-Hosted Request/Response -``` -non-persistent://tg/request/{service}:{flow-class} -non-persistent://tg/response/{service}:{flow-class} -``` - -### Flow-Hosted Fire-and-Forget -``` -persistent://tg/flow/{service}:{flow-id} -``` - -## Error Handling - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Authentication Errors -```bash -Exception: Unauthorized -``` -**Solution**: Check authentication credentials and permissions. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-start-flow`](tg-start-flow.md) - Start a new flow instance -- [`tg-stop-flow`](tg-stop-flow.md) - Stop a running flow -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes -- [`tg-show-flow-state`](tg-show-flow-state.md) - Show detailed flow status -- [`tg-show-config`](tg-show-config.md) - Show complete system configuration - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) to list flows and the [Config API](../apis/api-config.md) to retrieve interface descriptions. - -## Use Cases - -### Service Discovery -Find available services and their endpoints: -```bash -# List all flows and their services -tg-show-flows - -# Use discovered queue names for direct Pulsar integration -``` - -### System Monitoring -Monitor active flows and their configurations: -```bash -# Check what flows are running -tg-show-flows - -# Verify flow services are properly configured -``` - -### Development and Debugging -Understand flow configurations during development: -```bash -# Check if flow started correctly -tg-start-flow -n "my-class" -i "test-flow" -d "Test" -tg-show-flows - -# Verify service interfaces are available -``` - -### Integration Planning -Plan API integrations by understanding available services: -```bash -# Discover queue names for Pulsar clients -tg-show-flows | grep "graph-rag request" - -# Find WebSocket endpoints for real-time services -``` - -## Output Interpretation - -### Flow Information -- **id**: Unique flow instance identifier -- **class**: Flow blueprint name used to create the instance -- **desc**: Human-readable flow description -- **queue**: Service interfaces and their Pulsar queue names - -### Queue Names -Queue names indicate: -- **Persistence**: `persistent://` vs `non-persistent://` -- **Tenant**: Usually `tg` -- **Namespace**: `request`, `response`, or `flow` -- **Service**: The specific service name -- **Flow Identifier**: Either flow blueprint or flow ID - -## Best Practices - -1. **Regular Monitoring**: Check flows regularly to ensure they're running correctly -2. **Queue Documentation**: Save queue names for API integration documentation -3. **Flow Lifecycle**: Use in conjunction with flow start/stop commands -4. **Capacity Planning**: Monitor number of active flows for resource planning -5. **Service Discovery**: Use output to understand available capabilities \ No newline at end of file diff --git a/docs/cli/tg-show-graph.md b/docs/cli/tg-show-graph.md deleted file mode 100644 index 1da66dd5..00000000 --- a/docs/cli/tg-show-graph.md +++ /dev/null @@ -1,286 +0,0 @@ -# tg-show-graph - -Displays knowledge graph triples (edges) from the TrustGraph system. - -## Synopsis - -```bash -tg-show-graph [options] -``` - -## Description - -The `tg-show-graph` command queries the knowledge graph and displays up to 10,000 triples (subject-predicate-object relationships) in a human-readable format. This is useful for exploring knowledge graph contents, debugging knowledge loading, and understanding the structure of stored knowledge. - -Each triple represents a fact or relationship in the knowledge graph, showing how entities are connected through various predicates. - -## Options - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-f, --flow-id FLOW`: Flow ID to query (default: `default`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-C, --collection COLLECTION`: Collection identifier (default: `default`) - -## Examples - -### Display All Graph Triples -```bash -tg-show-graph -``` - -### Query Specific Flow -```bash -tg-show-graph -f research-flow -``` - -### Query User's Collection -```bash -tg-show-graph -U researcher -C medical-papers -``` - -### Using Custom API URL -```bash -tg-show-graph -u http://production:8088/ -``` - -## Output Format - -The command displays triples in subject-predicate-object format: - -``` - "John Doe" - - "Acme Corporation" - - "New York" - - "Research Report" - "2024" -``` - -### Triple Components - -- **Subject**: The entity the statement is about (usually a URI) -- **Predicate**: The relationship or property (usually a URI) -- **Object**: The value or target entity (can be URI or literal) - -### URI vs Literal Values - -- **URIs**: Enclosed in angle brackets `` -- **Literals**: Enclosed in quotes `"Literal Value"` - -### Common Predicates - -- ``: Entity names -- ``: Document titles -- ``: Authorship relationships -- ``: Employment relationships -- ``: Location relationships -- ``: Publication information -- ``: Dublin Core creator -- ``: Friend of a Friend name - -## Data Limitations - -### 10,000 Triple Limit -The command displays up to 10,000 triples to prevent overwhelming output. For larger graphs: - -```bash -# Use graph export for complete data -tg-graph-to-turtle -o complete-graph.ttl - -# Use targeted queries for specific data -tg-invoke-graph-rag -q "Show me information about specific entities" -``` - -### Collection Scope -Results are limited to the specified user and collection. To see all data: - -```bash -# Query different collections -tg-show-graph -C collection1 -tg-show-graph -C collection2 -``` - -## Knowledge Graph Structure - -### Entity Types -Common entity types in the output: -- **Documents**: Research papers, reports, manuals -- **People**: Authors, researchers, employees -- **Organizations**: Companies, institutions, publishers -- **Concepts**: Technical terms, topics, categories -- **Events**: Publications, meetings, processes - -### Relationship Types -Common relationship types: -- **Authorship**: Who created what -- **Membership**: Who belongs to what organization -- **Hierarchical**: Parent-child relationships -- **Temporal**: When things happened -- **Topical**: What topics are related - -## Error Handling - -### Flow Not Available -```bash -Exception: Invalid flow -``` -**Solution**: Verify the flow exists and is running with `tg-show-flows`. - -### No Data Available -```bash -# Empty output (no triples displayed) -``` -**Solution**: Check if knowledge has been loaded using `tg-show-kg-cores` and `tg-load-kg-core`. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Verify user permissions for the specified collection. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-graph-to-turtle`](tg-graph-to-turtle.md) - Export graph to Turtle format -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge into graph -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-invoke-graph-rag`](tg-invoke-graph-rag.md) - Query graph with natural language -- [`tg-load-turtle`](tg-load-turtle.md) - Import RDF data from Turtle files - -## API Integration - -This command uses the [Triples Query API](../apis/api-triples-query.md) to retrieve knowledge graph triples with no filtering constraints. - -## Use Cases - -### Knowledge Exploration -```bash -# Explore what knowledge is available -tg-show-graph | head -50 - -# Look for specific entities -tg-show-graph | grep "Einstein" -``` - -### Data Verification -```bash -# Verify knowledge loading worked correctly -tg-load-kg-core --kg-core-id "research-data" --flow-id "research-flow" -tg-show-graph -f research-flow | wc -l -``` - -### Debugging Knowledge Issues -```bash -# Check if specific relationships exist -tg-show-graph | grep "hasName" -tg-show-graph | grep "createdBy" -``` - -### Graph Analysis -```bash -# Count different relationship types -tg-show-graph | awk '{print $2}' | sort | uniq -c - -# Find most connected entities -tg-show-graph | awk '{print $1}' | sort | uniq -c | sort -nr -``` - -### Data Quality Assessment -```bash -# Check for malformed triples -tg-show-graph | grep -v "^<.*> <.*>" - -# Verify URI patterns -tg-show-graph | grep "http://" | head -20 -``` - -## Output Processing - -### Filter by Predicate -```bash -# Show only name relationships -tg-show-graph | grep "hasName" - -# Show only authorship -tg-show-graph | grep "createdBy" -``` - -### Extract Entities -```bash -# List all subjects (entities) -tg-show-graph | awk '{print $1}' | sort | uniq - -# List all predicates (relationships) -tg-show-graph | awk '{print $2}' | sort | uniq -``` - -### Export Subsets -```bash -# Save specific relationships -tg-show-graph | grep "Organization" > organization-data.txt - -# Save person-related triples -tg-show-graph | grep "Person" > person-data.txt -``` - -## Performance Considerations - -### Large Graphs -For graphs with many triples: -- Command may take time to retrieve 10,000 triples -- Consider using filtered queries for specific data -- Use `tg-graph-to-turtle` for complete export - -### Memory Usage -- Output is streamed, so memory usage is manageable -- Piping to other commands processes data incrementally - -## Best Practices - -1. **Start Small**: Begin with small collections to understand structure -2. **Use Filters**: Pipe output through grep/awk for specific data -3. **Regular Inspection**: Periodically check graph contents -4. **Data Validation**: Verify expected relationships exist -5. **Performance Monitoring**: Monitor query time for large graphs -6. **Collection Organization**: Use collections to organize different domains - -## Integration Examples - -### With Other Tools -```bash -# Convert to different formats -tg-show-graph | sed 's/[<>"]//g' > simple-triples.txt - -# Create entity lists -tg-show-graph | awk '{print $1}' | sort | uniq > entities.txt - -# Generate statistics -tg-show-graph | wc -l -echo "Total triples in graph" -``` - -### Graph Exploration Workflow -```bash -# 1. Check available knowledge -tg-show-kg-cores - -# 2. Load knowledge into flow -tg-load-kg-core --kg-core-id "my-knowledge" --flow-id "my-flow" - -# 3. Explore the graph -tg-show-graph -f my-flow - -# 4. Query specific information -tg-invoke-graph-rag -q "What entities are in the graph?" -f my-flow -``` \ No newline at end of file diff --git a/docs/cli/tg-show-kg-cores.md b/docs/cli/tg-show-kg-cores.md deleted file mode 100644 index d1436f4d..00000000 --- a/docs/cli/tg-show-kg-cores.md +++ /dev/null @@ -1,227 +0,0 @@ -# tg-show-kg-cores - -Shows available knowledge cores in the TrustGraph system. - -## Synopsis - -```bash -tg-show-kg-cores [options] -``` - -## Description - -The `tg-show-kg-cores` command lists all knowledge cores available in the TrustGraph system for a specific user. Knowledge cores contain structured knowledge (RDF triples and graph embeddings) that can be loaded into flows for processing and querying. - -This command is useful for discovering what knowledge resources are available, managing knowledge core inventories, and preparing for knowledge loading operations. - -## Options - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) - -## Examples - -### List All Knowledge Cores -```bash -tg-show-kg-cores -``` - -### List Cores for Specific User -```bash -tg-show-kg-cores -U researcher -``` - -### Using Custom API URL -```bash -tg-show-kg-cores -u http://production:8088/ -``` - -## Output Format - -The command lists knowledge core identifiers, one per line: - -``` -medical-knowledge-v1 -research-papers-2024 -legal-documents-core -technical-specifications -climate-data-march -``` - -### No Knowledge Cores -```bash -No knowledge cores. -``` - -## Knowledge Core Naming - -Knowledge cores typically follow naming conventions that include: -- **Domain**: `medical-`, `legal-`, `technical-` -- **Content Type**: `papers-`, `documents-`, `data-` -- **Version/Date**: `v1`, `2024`, `march` - -Example patterns: -- `medical-knowledge-v2.1` -- `research-papers-2024-q1` -- `legal-documents-updated` -- `technical-specs-current` - -## Related Operations - -After discovering knowledge cores, you can: - -### Load into Flow -```bash -# Load core into active flow -tg-load-kg-core --kg-core-id "medical-knowledge-v1" --flow-id "medical-flow" -``` - -### Examine Contents -```bash -# Export core for examination -tg-get-kg-core --id "research-papers-2024" -o examination.msgpack -``` - -### Remove Unused Cores -```bash -# Delete obsolete cores -tg-delete-kg-core --id "old-knowledge-v1" -U researcher -``` - -## Error Handling - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Authentication Errors -```bash -Exception: Unauthorized -``` -**Solution**: Check authentication credentials and user permissions. - -### User Not Found -```bash -Exception: User not found -``` -**Solution**: Verify the user identifier exists in the system. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-put-kg-core`](tg-put-kg-core.md) - Store knowledge core from file -- [`tg-get-kg-core`](tg-get-kg-core.md) - Retrieve knowledge core to file -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge core into flow -- [`tg-delete-kg-core`](tg-delete-kg-core.md) - Remove knowledge core -- [`tg-unload-kg-core`](tg-unload-kg-core.md) - Unload knowledge core from flow - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) with the `list-kg-cores` operation to retrieve available knowledge cores. - -## Use Cases - -### Knowledge Inventory -```bash -# Check what knowledge is available -tg-show-kg-cores - -# Document available knowledge resources -tg-show-kg-cores > knowledge-inventory.txt -``` - -### Pre-Processing Verification -```bash -# Verify knowledge cores exist before loading -tg-show-kg-cores | grep "medical" -tg-load-kg-core --kg-core-id "medical-knowledge-v1" --flow-id "medical-flow" -``` - -### Multi-User Management -```bash -# Check knowledge for different users -tg-show-kg-cores -U researcher -tg-show-kg-cores -U analyst -tg-show-kg-cores -U admin -``` - -### Knowledge Discovery -```bash -# Find knowledge cores by pattern -tg-show-kg-cores | grep "2024" -tg-show-kg-cores | grep "medical" -tg-show-kg-cores | grep "v[0-9]" -``` - -### System Administration -```bash -# Audit knowledge core usage -for user in $(cat users.txt); do - echo "User: $user" - tg-show-kg-cores -U $user - echo -done -``` - -### Development Workflow -```bash -# Check development knowledge cores -tg-show-kg-cores -U developer | grep "test" - -# Load test knowledge for development -tg-load-kg-core --kg-core-id "test-knowledge" --flow-id "dev-flow" -``` - -## Knowledge Core Lifecycle - -1. **Creation**: Knowledge cores created via `tg-put-kg-core` or document processing -2. **Discovery**: Use `tg-show-kg-cores` to find available cores -3. **Loading**: Load cores into flows with `tg-load-kg-core` -4. **Usage**: Query loaded knowledge via RAG or agent services -5. **Management**: Update, backup, or remove cores as needed - -## Best Practices - -1. **Regular Inventory**: Check available knowledge cores regularly -2. **Naming Conventions**: Use consistent naming for easier discovery -3. **User Organization**: Organize knowledge cores by user and purpose -4. **Version Management**: Track knowledge core versions and updates -5. **Cleanup**: Remove obsolete knowledge cores to save storage -6. **Documentation**: Document knowledge core contents and purposes - -## Integration with Other Commands - -### Knowledge Loading Workflow -```bash -# 1. Discover available knowledge -tg-show-kg-cores - -# 2. Start appropriate flow -tg-start-flow -n "research-class" -i "research-flow" -d "Research analysis" - -# 3. Load relevant knowledge -tg-load-kg-core --kg-core-id "research-papers-2024" --flow-id "research-flow" - -# 4. Query the knowledge -tg-invoke-graph-rag -q "What are the latest research trends?" -f "research-flow" -``` - -### Knowledge Management Workflow -```bash -# 1. Audit current knowledge -tg-show-kg-cores > current-cores.txt - -# 2. Import new knowledge -tg-put-kg-core --id "new-research-2024" -i new-research.msgpack - -# 3. Verify import -tg-show-kg-cores | grep "new-research-2024" - -# 4. Remove old versions -tg-delete-kg-core --id "old-research-2023" -``` \ No newline at end of file diff --git a/docs/cli/tg-show-library-documents.md b/docs/cli/tg-show-library-documents.md deleted file mode 100644 index ea5118a9..00000000 --- a/docs/cli/tg-show-library-documents.md +++ /dev/null @@ -1,481 +0,0 @@ -# tg-show-library-documents - -Lists all documents stored in the TrustGraph document library with their metadata. - -## Synopsis - -```bash -tg-show-library-documents [options] -``` - -## Description - -The `tg-show-library-documents` command displays all documents currently stored in TrustGraph's document library. For each document, it shows comprehensive metadata including ID, timestamp, title, document type, comments, and associated tags. - -The document library serves as a centralized repository for managing documents before and after processing through TrustGraph workflows. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID to filter documents (default: `trustgraph`) - -## Examples - -### List All Documents -```bash -tg-show-library-documents -``` - -### List Documents for Specific User -```bash -tg-show-library-documents -U "research-team" -``` - -### Using Custom API URL -```bash -tg-show-library-documents -u http://production:8088/ -``` - -## Output Format - -The command displays each document in a formatted table: - -``` -+-------+----------------------------------+ -| id | doc_123456789 | -| time | 2023-12-15 10:30:45 | -| title | Technical Manual v2.1 | -| kind | PDF | -| note | Updated installation procedures | -| tags | technical, manual, v2.1 | -+-------+----------------------------------+ - -+-------+----------------------------------+ -| id | doc_987654321 | -| time | 2023-12-14 15:22:10 | -| title | Q4 Financial Report | -| kind | PDF | -| note | Quarterly analysis and metrics | -| tags | finance, quarterly, 2023 | -+-------+----------------------------------+ -``` - -### Document Properties - -- **id**: Unique document identifier -- **time**: Upload/creation timestamp -- **title**: Document title or name -- **kind**: Document type (PDF, DOCX, TXT, etc.) -- **note**: Comments or description -- **tags**: Comma-separated list of tags - -### Empty Results - -If no documents exist: -``` -No documents. -``` - -## Use Cases - -### Document Inventory -```bash -# Get complete document inventory -tg-show-library-documents > document-inventory.txt - -# Count total documents -tg-show-library-documents | grep -c "| id" -``` - -### Document Discovery -```bash -# Find documents by title pattern -tg-show-library-documents | grep -i "manual" - -# Find documents by type -tg-show-library-documents | grep "| kind.*PDF" - -# Find recent documents -tg-show-library-documents | grep "2023-12" -``` - -### User-Specific Queries -```bash -# List documents by different users -users=("research-team" "finance-dept" "legal-team") -for user in "${users[@]}"; do - echo "Documents for $user:" - tg-show-library-documents -U "$user" - echo "---" -done -``` - -### Document Management -```bash -# Extract document IDs for processing -tg-show-library-documents | \ - grep "| id" | \ - awk '{print $3}' > document-ids.txt - -# Find documents by tags -tg-show-library-documents | \ - grep -A5 -B5 "research" | \ - grep "| id" | \ - awk '{print $3}' -``` - -## Advanced Usage - -### Document Analysis -```bash -# Analyze document distribution by type -analyze_document_types() { - echo "Document Type Distribution:" - echo "==========================" - - tg-show-library-documents | \ - grep "| kind" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr -} - -analyze_document_types -``` - -### Document Age Analysis -```bash -# Find old documents -find_old_documents() { - local days_old="$1" - - echo "Documents older than $days_old days:" - echo "====================================" - - cutoff_date=$(date -d "$days_old days ago" +"%Y-%m-%d") - - tg-show-library-documents | \ - grep "| time" | \ - while read -r line; do - doc_date=$(echo "$line" | awk '{print $3}') - if [[ "$doc_date" < "$cutoff_date" ]]; then - echo "$line" - fi - done -} - -# Find documents older than 30 days -find_old_documents 30 -``` - -### Tag Analysis -```bash -# Analyze tag usage -analyze_tags() { - echo "Tag Usage Analysis:" - echo "==================" - - tg-show-library-documents | \ - grep "| tags" | \ - sed 's/| tags.*| \(.*\) |/\1/' | \ - tr ',' '\n' | \ - sed 's/^ *//;s/ *$//' | \ - sort | uniq -c | sort -nr -} - -analyze_tags -``` - -### Document Search -```bash -# Search documents by multiple criteria -search_documents() { - local query="$1" - - echo "Searching for: $query" - echo "====================" - - tg-show-library-documents | \ - grep -i -A6 -B6 "$query" | \ - grep -E "^\+|^\|" -} - -# Search for specific terms -search_documents "financial" -search_documents "manual" -``` - -### User Document Summary -```bash -# Generate user document summary -user_summary() { - local user="$1" - - echo "Document Summary for User: $user" - echo "================================" - - docs=$(tg-show-library-documents -U "$user") - - if [[ "$docs" == "No documents." ]]; then - echo "No documents found for user: $user" - return - fi - - # Count documents - doc_count=$(echo "$docs" | grep -c "| id") - echo "Total documents: $doc_count" - - # Count by type - echo -e "\nBy type:" - echo "$docs" | \ - grep "| kind" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr - - # Recent documents - echo -e "\nRecent documents (last 7 days):" - recent_date=$(date -d "7 days ago" +"%Y-%m-%d") - echo "$docs" | \ - grep "| time" | \ - awk -v cutoff="$recent_date" '$3 >= cutoff {print $0}' -} - -# Generate summary for specific user -user_summary "research-team" -``` - -### Document Export -```bash -# Export document metadata to CSV -export_to_csv() { - local output_file="$1" - - echo "id,time,title,kind,note,tags" > "$output_file" - - tg-show-library-documents | \ - awk ' - BEGIN { record="" } - /^\+/ { - if (record != "") { - print record - record="" - } - } - /^\| id/ { gsub(/^\| id *\| /, ""); gsub(/ *\|$/, ""); record=$0"," } - /^\| time/ { gsub(/^\| time *\| /, ""); gsub(/ *\|$/, ""); record=record$0"," } - /^\| title/ { gsub(/^\| title *\| /, ""); gsub(/ *\|$/, ""); record=record$0"," } - /^\| kind/ { gsub(/^\| kind *\| /, ""); gsub(/ *\|$/, ""); record=record$0"," } - /^\| note/ { gsub(/^\| note *\| /, ""); gsub(/ *\|$/, ""); record=record$0"," } - /^\| tags/ { gsub(/^\| tags *\| /, ""); gsub(/ *\|$/, ""); record=record$0 } - END { if (record != "") print record } - ' >> "$output_file" - - echo "Exported to: $output_file" -} - -# Export to CSV -export_to_csv "documents.csv" -``` - -### Document Monitoring -```bash -# Monitor document library changes -monitor_documents() { - local interval="$1" - local log_file="document_changes.log" - - echo "Monitoring document library (interval: ${interval}s)" - echo "Log file: $log_file" - - # Get initial state - tg-show-library-documents > last_state.tmp - - while true; do - sleep "$interval" - - # Get current state - tg-show-library-documents > current_state.tmp - - # Compare states - if ! diff -q last_state.tmp current_state.tmp > /dev/null; then - timestamp=$(date) - echo "[$timestamp] Document library changed" >> "$log_file" - - # Log differences - diff last_state.tmp current_state.tmp >> "$log_file" - echo "---" >> "$log_file" - - # Update last state - mv current_state.tmp last_state.tmp - - echo "[$timestamp] Changes detected and logged" - else - rm current_state.tmp - fi - done -} - -# Monitor every 60 seconds -monitor_documents 60 -``` - -### Bulk Operations Helper -```bash -# Generate commands for bulk operations -generate_bulk_commands() { - local operation="$1" - - case "$operation" in - "remove-old") - echo "# Commands to remove old documents:" - cutoff_date=$(date -d "90 days ago" +"%Y-%m-%d") - tg-show-library-documents | \ - grep -B1 "| time.*$cutoff_date" | \ - grep "| id" | \ - awk '{print "tg-remove-library-document --id " $3}' - ;; - "process-unprocessed") - echo "# Commands to process documents:" - tg-show-library-documents | \ - grep "| id" | \ - awk '{print "tg-start-library-processing -d " $3 " --id proc-" $3}' - ;; - *) - echo "Unknown operation: $operation" - echo "Available: remove-old, process-unprocessed" - ;; - esac -} - -# Generate removal commands for old documents -generate_bulk_commands "remove-old" -``` - -## Integration with Other Commands - -### Document Processing Workflow -```bash -# Complete document workflow -process_document_workflow() { - echo "Document Library Workflow" - echo "========================" - - # 1. List current documents - echo "Current documents:" - tg-show-library-documents - - # 2. Add new document (example) - # tg-add-library-document --file new-doc.pdf --title "New Document" - - # 3. Start processing - # tg-start-library-processing -d doc_id --id proc_id - - # 4. Monitor processing - # tg-show-flows | grep processing - - # 5. Verify completion - echo "Documents after processing:" - tg-show-library-documents -} -``` - -### Document Lifecycle Management -```bash -# Manage document lifecycle -lifecycle_management() { - echo "Document Lifecycle Management" - echo "============================" - - # Get all documents - tg-show-library-documents | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - echo "Processing document: $doc_id" - - # Check if already processed - if tg-invoke-document-rag -q "test" 2>/dev/null | grep -q "$doc_id"; then - echo " Already processed" - else - echo " Starting processing..." - # tg-start-library-processing -d "$doc_id" --id "proc-$doc_id" - fi - done -} -``` - -## Error Handling - -### Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Verify user permissions for library access. - -### User Not Found -```bash -Exception: User not found -``` -**Solution**: Check user ID spelling and ensure user exists. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-add-library-document`](tg-add-library-document.md) - Add documents to library -- [`tg-remove-library-document`](tg-remove-library-document.md) - Remove documents from library -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start document processing -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop document processing -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Query processed documents - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to retrieve document metadata and listings. - -## Best Practices - -1. **Regular Monitoring**: Check library contents regularly -2. **User Organization**: Use different users for different document categories -3. **Tag Consistency**: Maintain consistent tagging schemes -4. **Cleanup**: Regularly remove outdated documents -5. **Backup**: Export document metadata for backup purposes -6. **Access Control**: Use appropriate user permissions -7. **Documentation**: Maintain good document titles and descriptions - -## Troubleshooting - -### No Documents Shown -```bash -# Check if documents exist for different users -tg-show-library-documents -U "different-user" - -# Verify API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/library/documents" > /dev/null -echo "API response: $?" -``` - -### Formatting Issues -```bash -# If output is garbled, check terminal width -export COLUMNS=120 -tg-show-library-documents -``` - -### Slow Response -```bash -# For large document libraries, consider filtering by user -tg-show-library-documents -U "specific-user" - -# Check system resources -free -h -ps aux | grep trustgraph -``` \ No newline at end of file diff --git a/docs/cli/tg-show-library-processing.md b/docs/cli/tg-show-library-processing.md deleted file mode 100644 index 690b7e12..00000000 --- a/docs/cli/tg-show-library-processing.md +++ /dev/null @@ -1,572 +0,0 @@ -# tg-show-library-processing - -Displays all active library document processing records and their details. - -## Synopsis - -```bash -tg-show-library-processing [options] -``` - -## Description - -The `tg-show-library-processing` command lists all library document processing records, showing the status and details of document processing jobs that have been initiated through the library system. This provides visibility into which documents are being processed, their associated flows, and processing metadata. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID to filter processing records (default: `trustgraph`) - -## Examples - -### Show All Processing Records -```bash -tg-show-library-processing -``` - -### Show Processing for Specific User -```bash -tg-show-library-processing -U "research-team" -``` - -### Use Custom API URL -```bash -tg-show-library-processing -u http://production:8088/ -``` - -## Output Format - -The command displays processing records in formatted tables: - -``` -+----------------+----------------------------------+ -| id | proc_research_001 | -| document-id | doc_123456789 | -| time | 2023-12-15 14:30:22 | -| flow | research-processing | -| collection | research-docs | -| tags | nlp, research, automated | -+----------------+----------------------------------+ - -+----------------+----------------------------------+ -| id | proc_batch_002 | -| document-id | doc_987654321 | -| time | 2023-12-15 14:25:18 | -| flow | document-analysis | -| collection | batch-processed | -| tags | batch, analysis | -+----------------+----------------------------------+ -``` - -### Field Details - -- **id**: Unique processing record identifier -- **document-id**: ID of the document being processed -- **time**: Timestamp when processing was initiated -- **flow**: Flow instance used for processing -- **collection**: Target collection for processed data -- **tags**: Associated tags for categorization - -### Empty Results - -If no processing records exist: -``` -No processing objects. -``` - -## Use Cases - -### Processing Status Monitoring -```bash -# Monitor active processing jobs -monitor_processing_status() { - local interval="${1:-30}" # Default 30 seconds - - echo "Monitoring library processing status..." - echo "Refresh interval: ${interval}s" - echo "Press Ctrl+C to stop" - - while true; do - clear - echo "Library Processing Monitor - $(date)" - echo "====================================" - - tg-show-library-processing - - echo -e "\nProcessing Summary:" - processing_count=$(tg-show-library-processing 2>/dev/null | grep -c "| id" || echo "0") - echo "Active processing jobs: $processing_count" - - sleep "$interval" - done -} - -# Start monitoring -monitor_processing_status 15 -``` - -### User Activity Analysis -```bash -# Analyze processing activity by user -analyze_user_processing() { - local users=("user1" "user2" "user3" "research-team") - - echo "Processing Activity Analysis" - echo "===========================" - - for user in "${users[@]}"; do - echo -e "\n--- User: $user ---" - - processing_output=$(tg-show-library-processing -U "$user" 2>/dev/null) - - if echo "$processing_output" | grep -q "No processing objects"; then - echo "No active processing" - else - count=$(echo "$processing_output" | grep -c "| id" || echo "0") - echo "Active processing jobs: $count" - - # Show recent jobs - echo "Recent processing:" - echo "$processing_output" | grep -E "(id|time|flow)" | head -9 - fi - done -} - -# Run analysis -analyze_user_processing -``` - -### Processing Queue Management -```bash -# Manage processing queue -manage_processing_queue() { - echo "Processing Queue Management" - echo "==========================" - - # Show current queue - echo "Current processing queue:" - tg-show-library-processing - - # Count by flow - echo -e "\nProcessing jobs by flow:" - tg-show-library-processing | \ - grep "| flow" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr - - # Count by collection - echo -e "\nProcessing jobs by collection:" - tg-show-library-processing | \ - grep "| collection" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr - - # Find long-running jobs (would need timestamps comparison) - echo -e "\nNote: Check timestamps for long-running jobs" -} - -# Run queue management -manage_processing_queue -``` - -### Cleanup and Maintenance -```bash -# Clean up completed processing records -cleanup_processing_records() { - local user="$1" - local max_age_days="${2:-7}" # Default 7 days - - echo "Cleaning up processing records older than $max_age_days days for user: $user" - - # Get processing records - processing_output=$(tg-show-library-processing -U "$user") - - if echo "$processing_output" | grep -q "No processing objects"; then - echo "No processing records to clean up" - return - fi - - # Parse processing records (this is a simplified example) - echo "$processing_output" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read proc_id; do - echo "Checking processing record: $proc_id" - - # Get the time for this processing record - proc_time=$(echo "$processing_output" | \ - grep -A10 "$proc_id" | \ - grep "| time" | \ - awk '{print $3 " " $4}') - - if [ -n "$proc_time" ]; then - # Calculate age (this would need proper date comparison) - echo "Processing record $proc_id from: $proc_time" - - # Check if document processing is complete - if tg-invoke-document-rag -q "test" -U "$user" 2>/dev/null | grep -q "answer"; then - echo "Document appears to be processed, considering cleanup..." - # tg-stop-library-processing --id "$proc_id" -U "$user" - fi - fi - done -} - -# Clean up old records -cleanup_processing_records "test-user" 3 -``` - -## Advanced Usage - -### Processing Performance Analysis -```bash -# Analyze processing performance -analyze_processing_performance() { - echo "Processing Performance Analysis" - echo "==============================" - - # Get all processing records - processing_data=$(tg-show-library-processing) - - if echo "$processing_data" | grep -q "No processing objects"; then - echo "No processing data available" - return - fi - - # Count total processing jobs - total_jobs=$(echo "$processing_data" | grep -c "| id") - echo "Total active processing jobs: $total_jobs" - - # Analyze by flow type - echo -e "\nJobs by flow type:" - echo "$processing_data" | \ - grep "| flow" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr | \ - while read count flow; do - echo " $flow: $count jobs" - done - - # Analyze by time patterns - echo -e "\nJobs by hour (last 24h):" - echo "$processing_data" | \ - grep "| time" | \ - awk '{print $4}' | \ - cut -d: -f1 | \ - sort | uniq -c | sort -k2n | \ - while read count hour; do - echo " ${hour}:00: $count jobs" - done -} - -# Run performance analysis -analyze_processing_performance -``` - -### Cross-User Processing Comparison -```bash -# Compare processing across users -compare_user_processing() { - local users=("$@") - - echo "Cross-User Processing Comparison" - echo "===============================" - - for user in "${users[@]}"; do - echo -e "\n--- User: $user ---" - - processing_data=$(tg-show-library-processing -U "$user" 2>/dev/null) - - if echo "$processing_data" | grep -q "No processing objects"; then - echo "Active jobs: 0" - echo "Collections: none" - echo "Flows: none" - else - # Count jobs - job_count=$(echo "$processing_data" | grep -c "| id") - echo "Active jobs: $job_count" - - # List collections - collections=$(echo "$processing_data" | \ - grep "| collection" | \ - awk '{print $3}' | \ - sort | uniq | tr '\n' ', ' | sed 's/,$//') - echo "Collections: $collections" - - # List flows - flows=$(echo "$processing_data" | \ - grep "| flow" | \ - awk '{print $3}' | \ - sort | uniq | tr '\n' ', ' | sed 's/,$//') - echo "Flows: $flows" - fi - done -} - -# Compare processing for multiple users -compare_user_processing "user1" "user2" "research-team" "admin" -``` - -### Processing Health Check -```bash -# Health check for processing system -processing_health_check() { - echo "Library Processing Health Check" - echo "==============================" - - # Check if processing service is responsive - if tg-show-library-processing > /dev/null 2>&1; then - echo "✓ Processing service is responsive" - else - echo "✗ Processing service is not responsive" - return 1 - fi - - # Get processing statistics - processing_data=$(tg-show-library-processing 2>/dev/null) - - if echo "$processing_data" | grep -q "No processing objects"; then - echo "ℹ No active processing jobs" - else - active_jobs=$(echo "$processing_data" | grep -c "| id") - echo "ℹ Active processing jobs: $active_jobs" - - # Check for stuck jobs (simplified check) - echo "Recent job timestamps:" - echo "$processing_data" | \ - grep "| time" | \ - awk '{print $3 " " $4}' | \ - head -5 - fi - - # Check flow availability - echo -e "\nFlow availability check:" - flows=$(echo "$processing_data" | grep "| flow" | awk '{print $3}' | sort | uniq) - - for flow in $flows; do - if tg-show-flows | grep -q "$flow"; then - echo "✓ Flow '$flow' is available" - else - echo "⚠ Flow '$flow' may not be available" - fi - done - - echo "Health check completed" -} - -# Run health check -processing_health_check -``` - -### Processing Report Generation -```bash -# Generate comprehensive processing report -generate_processing_report() { - local output_file="processing_report_$(date +%Y%m%d_%H%M%S).txt" - - echo "Generating processing report: $output_file" - - cat > "$output_file" << EOF -TrustGraph Library Processing Report -Generated: $(date) -==================================== - -EOF - - # Overall statistics - echo "OVERVIEW" >> "$output_file" - echo "--------" >> "$output_file" - - processing_data=$(tg-show-library-processing 2>/dev/null) - - if echo "$processing_data" | grep -q "No processing objects"; then - echo "No active processing jobs" >> "$output_file" - else - total_jobs=$(echo "$processing_data" | grep -c "| id") - echo "Total active jobs: $total_jobs" >> "$output_file" - - # Flow distribution - echo -e "\nFLOW DISTRIBUTION" >> "$output_file" - echo "-----------------" >> "$output_file" - echo "$processing_data" | \ - grep "| flow" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr >> "$output_file" - - # Collection distribution - echo -e "\nCOLLECTION DISTRIBUTION" >> "$output_file" - echo "-----------------------" >> "$output_file" - echo "$processing_data" | \ - grep "| collection" | \ - awk '{print $3}' | \ - sort | uniq -c | sort -nr >> "$output_file" - - # Recent activity - echo -e "\nRECENT PROCESSING JOBS" >> "$output_file" - echo "----------------------" >> "$output_file" - echo "$processing_data" | head -50 >> "$output_file" - fi - - echo "Report generated: $output_file" -} - -# Generate report -generate_processing_report -``` - -## Integration with Other Commands - -### Processing Workflow Management -```bash -# Complete processing workflow -manage_processing_workflow() { - local user="$1" - local action="$2" - - case "$action" in - "status") - echo "Processing status for user: $user" - tg-show-library-processing -U "$user" - ;; - "start-batch") - echo "Starting batch processing for user: $user" - tg-show-library-documents -U "$user" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read doc_id; do - proc_id="batch_$(date +%s)_${doc_id}" - tg-start-library-processing -d "$doc_id" --id "$proc_id" -U "$user" - done - ;; - "cleanup") - echo "Cleaning up completed processing for user: $user" - cleanup_processing_records "$user" - ;; - *) - echo "Usage: manage_processing_workflow " - ;; - esac -} - -# Manage workflow for user -manage_processing_workflow "research-team" "status" -``` - -### Monitoring Integration -```bash -# Integration with system monitoring -processing_metrics_export() { - local metrics_file="processing_metrics.txt" - - # Get processing data - processing_data=$(tg-show-library-processing 2>/dev/null) - - if echo "$processing_data" | grep -q "No processing objects"; then - active_jobs=0 - else - active_jobs=$(echo "$processing_data" | grep -c "| id") - fi - - # Export metrics - echo "trustgraph_library_processing_active_jobs $active_jobs" > "$metrics_file" - echo "trustgraph_library_processing_timestamp $(date +%s)" >> "$metrics_file" - - # Export by flow - if [ "$active_jobs" -gt 0 ]; then - echo "$processing_data" | \ - grep "| flow" | \ - awk '{print $3}' | \ - sort | uniq -c | \ - while read count flow; do - echo "trustgraph_library_processing_jobs_by_flow{flow=\"$flow\"} $count" >> "$metrics_file" - done - fi - - echo "Metrics exported to: $metrics_file" -} - -processing_metrics_export -``` - -## Error Handling - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Verify user permissions for library access. - -### User Not Found -```bash -Exception: User not found -``` -**Solution**: Check user ID and ensure user exists in the system. - -### Service Unavailable -```bash -Exception: Service temporarily unavailable -``` -**Solution**: Check TrustGraph service status and try again. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start document processing -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop document processing -- [`tg-show-library-documents`](tg-show-library-documents.md) - List library documents -- [`tg-show-flows`](tg-show-flows.md) - List available flows - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to retrieve processing record information. - -## Best Practices - -1. **Regular Monitoring**: Check processing status regularly -2. **User Filtering**: Use user filtering to focus on relevant processing -3. **Cleanup**: Regularly clean up completed processing records -4. **Performance Tracking**: Monitor processing patterns and performance -5. **Integration**: Integrate with monitoring and alerting systems -6. **Documentation**: Document processing workflows and procedures -7. **Troubleshooting**: Use processing information for issue diagnosis - -## Troubleshooting - -### No Processing Records -```bash -# Check if library service is running -curl -s http://localhost:8088/api/v1/library/processing - -# Verify documents exist -tg-show-library-documents -``` - -### Stale Processing Records -```bash -# Check for long-running processes -tg-show-library-processing | grep "$(date -d '1 hour ago' '+%Y-%m-%d')" - -# Check flow status -tg-show-flows -``` - -### Performance Issues -```bash -# Check system resources -free -h -df -h - -# Monitor API response times -time tg-show-library-processing -``` \ No newline at end of file diff --git a/docs/cli/tg-show-processor-state.md b/docs/cli/tg-show-processor-state.md deleted file mode 100644 index ab6017c7..00000000 --- a/docs/cli/tg-show-processor-state.md +++ /dev/null @@ -1,196 +0,0 @@ -# tg-show-processor-state - -## Synopsis - -``` -tg-show-processor-state [OPTIONS] -``` - -## Description - -The `tg-show-processor-state` command displays the current state of TrustGraph processors by querying the metrics endpoint. It retrieves processor information from the Prometheus metrics API and displays active processors with visual status indicators. - -This command is useful for: -- Monitoring processor health and availability -- Verifying that processors are running correctly -- Troubleshooting processor connectivity issues -- Getting a quick overview of active TrustGraph components - -## Options - -- `-m, --metrics-url URL` - - Metrics endpoint URL to query for processor information - - Default: `http://localhost:8088/api/metrics` - - Should point to a Prometheus-compatible metrics endpoint - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic Usage - -Display processor states using the default metrics URL: -```bash -tg-show-processor-state -``` - -### Custom Metrics URL - -Query processor states from a different metrics endpoint: -```bash -tg-show-processor-state -m http://metrics.example.com:8088/api/metrics -``` - -### Remote Monitoring - -Monitor processors on a remote TrustGraph instance: -```bash -tg-show-processor-state --metrics-url http://10.0.1.100:8088/api/metrics -``` - -## Output Format - -The command displays processor information in a table format: -``` - processor_name 💚 - another_processor 💚 - third_processor 💚 -``` - -Each line shows: -- Processor name (left-aligned, 30 characters wide) -- Status indicator (💚 for active processors) - -## Advanced Usage - -### Monitoring Script - -Create a monitoring script to periodically check processor states: -```bash -#!/bin/bash -while true; do - echo "=== Processor State Check ===" - date - tg-show-processor-state - echo - sleep 30 -done -``` - -### Health Check Integration - -Use in health check scripts: -```bash -#!/bin/bash -output=$(tg-show-processor-state 2>&1) -if [ $? -eq 0 ]; then - echo "Processors are running" - echo "$output" -else - echo "Error checking processor state: $output" - exit 1 -fi -``` - -### Multiple Environment Monitoring - -Monitor processors across different environments: -```bash -#!/bin/bash -for env in dev staging prod; do - echo "=== $env Environment ===" - tg-show-processor-state -m "http://${env}-metrics:8088/api/metrics" - echo -done -``` - -## Error Handling - -The command handles various error conditions: - -- **Connection errors**: If the metrics endpoint is unavailable -- **Invalid JSON**: If the metrics response is malformed -- **Missing data**: If the expected processor_info metric is not found -- **HTTP errors**: If the metrics endpoint returns an error status - -Common error scenarios: -```bash -# Metrics endpoint not available -tg-show-processor-state -m http://invalid-host:8088/api/metrics -# Output: Exception: [Connection error details] - -# Invalid URL format -tg-show-processor-state -m "not-a-url" -# Output: Exception: [URL parsing error] -``` - -## Integration with Other Commands - -### With Flow Monitoring - -Combine with flow state monitoring: -```bash -echo "=== Processor States ===" -tg-show-processor-state -echo -echo "=== Flow States ===" -tg-show-flow-state -``` - -### With Configuration Display - -Check processors and current configuration: -```bash -echo "=== Active Processors ===" -tg-show-processor-state -echo -echo "=== Current Configuration ===" -tg-show-config -``` - -## Best Practices - -1. **Regular Monitoring**: Include in regular health check routines -2. **Error Handling**: Always check command exit status in scripts -3. **Logging**: Capture output for historical analysis -4. **Alerting**: Set up alerts based on processor availability -5. **Documentation**: Keep track of expected processors for each environment - -## Troubleshooting - -### No Processors Shown - -If no processors are displayed: -1. Verify the metrics endpoint is accessible -2. Check that TrustGraph processors are running -3. Ensure processors are properly configured to export metrics -4. Verify the metrics URL is correct - -### Connection Issues - -For connection problems: -1. Test network connectivity to the metrics endpoint -2. Verify the metrics service is running -3. Check firewall rules and network policies -4. Ensure the correct port is being used - -### Metrics Format Issues - -If the command fails with JSON parsing errors: -1. Verify the metrics endpoint returns Prometheus-compatible data -2. Check that the `processor_info` metric exists -3. Ensure the metrics service is properly configured - -## Related Commands - -- [`tg-show-flow-state`](tg-show-flow-state.md) - Display flow processor states -- [`tg-show-config`](tg-show-config.md) - Show TrustGraph configuration -- [`tg-show-token-costs`](tg-show-token-costs.md) - Display token usage costs -- [`tg-show-library-processing`](tg-show-library-processing.md) - Show library processing status - -## See Also - -- TrustGraph Processor Documentation -- Prometheus Metrics Configuration -- TrustGraph Monitoring Guide \ No newline at end of file diff --git a/docs/cli/tg-show-prompts.md b/docs/cli/tg-show-prompts.md deleted file mode 100644 index 72d9937e..00000000 --- a/docs/cli/tg-show-prompts.md +++ /dev/null @@ -1,454 +0,0 @@ -# tg-show-prompts - -Displays all configured prompt templates and system prompts in TrustGraph. - -## Synopsis - -```bash -tg-show-prompts [options] -``` - -## Description - -The `tg-show-prompts` command displays all prompt templates and the system prompt currently configured in TrustGraph. This includes template IDs, prompt text, response types, and JSON schemas for structured responses. - -Use this command to review existing prompts, verify configurations, and understand available templates for use with `tg-invoke-prompt`. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Display All Prompts -```bash -tg-show-prompts -``` - -### Using Custom API URL -```bash -tg-show-prompts -u http://production:8088/ -``` - -## Output Format - -The command displays prompts in formatted tables: - -``` -System prompt: -+---------+--------------------------------------------------+ -| prompt | You are a helpful AI assistant. Always provide | -| | accurate, concise responses. When uncertain, | -| | clearly state your limitations. | -+---------+--------------------------------------------------+ - -greeting: -+---------+--------------------------------------------------+ -| prompt | Hello {{name}}, welcome to {{place}}! | -+---------+--------------------------------------------------+ - -question: -+----------+-------------------------------------------------+ -| prompt | Answer this question based on the context: | -| | {{question}} | -| | | -| | Context: {{context}} | -+----------+-------------------------------------------------+ - -extract-info: -+----------+-------------------------------------------------+ -| prompt | Extract key information from: {{text}} | -| response | json | -| schema | {"type": "object", "properties": { | -| | "name": {"type": "string"}, | -| | "age": {"type": "number"}}} | -+----------+-------------------------------------------------+ -``` - -### Template Information - -For each template, the output shows: -- **prompt**: The template text with variable placeholders -- **response**: Response format (`text` or `json`) -- **schema**: JSON schema for structured responses (when applicable) - -## Use Cases - -### Template Discovery -```bash -# Find all available templates -tg-show-prompts | grep "^[a-zA-Z]" | grep ":" - -# Find templates with specific keywords -tg-show-prompts | grep -B5 -A5 "analyze" -``` - -### Template Verification -```bash -# Check if specific template exists -if tg-show-prompts | grep -q "my-template:"; then - echo "Template exists" -else - echo "Template not found" -fi -``` - -### Configuration Review -```bash -# Review current system prompt -tg-show-prompts | grep -A10 "System prompt:" - -# Check JSON response templates -tg-show-prompts | grep -B2 -A5 "response.*json" -``` - -### Template Inventory -```bash -# Count total templates -template_count=$(tg-show-prompts | grep -c "^[a-zA-Z][^:]*:$") -echo "Total templates: $template_count" - -# List template names only -tg-show-prompts | grep "^[a-zA-Z][^:]*:$" | sed 's/:$//' -``` - -## Advanced Usage - -### Template Analysis -```bash -# Analyze template complexity -analyze_templates() { - echo "Template Analysis" - echo "================" - - tg-show-prompts > temp_prompts.txt - - # Count variables per template - echo "Templates with variables:" - grep -B1 -A5 "{{" temp_prompts.txt | \ - grep "^[a-zA-Z]" | \ - while read template; do - var_count=$(grep -A5 "$template" temp_prompts.txt | grep -o "{{[^}]*}}" | wc -l) - echo " $template $var_count variables" - done - - # Find JSON response templates - echo -e "\nJSON Response Templates:" - grep -B1 "response.*json" temp_prompts.txt | \ - grep "^[a-zA-Z]" | \ - sed 's/:$//' - - rm temp_prompts.txt -} - -analyze_templates -``` - -### Template Documentation Generator -```bash -# Generate template documentation -generate_template_docs() { - local output_file="template_documentation.md" - - echo "# TrustGraph Prompt Templates" > "$output_file" - echo "Generated on $(date)" >> "$output_file" - echo "" >> "$output_file" - - # Extract system prompt - echo "## System Prompt" >> "$output_file" - tg-show-prompts | \ - awk '/System prompt:/,/^\+.*\+$/' | \ - grep "| prompt" | \ - sed 's/| prompt | //' | \ - sed 's/ *|$//' >> "$output_file" - - echo "" >> "$output_file" - echo "## Templates" >> "$output_file" - - # Extract each template - tg-show-prompts | \ - grep "^[a-zA-Z][^:]*:$" | \ - sed 's/:$//' | \ - while read template_id; do - echo "" >> "$output_file" - echo "### $template_id" >> "$output_file" - - # Get template details - tg-show-prompts | \ - awk "/^$template_id:/,/^$/" | \ - while read line; do - if [[ "$line" =~ ^\|\ prompt ]]; then - echo "**Prompt:**" >> "$output_file" - echo '```' >> "$output_file" - echo "$line" | sed 's/| prompt[[:space:]]*| //' | sed 's/ *|$//' >> "$output_file" - echo '```' >> "$output_file" - elif [[ "$line" =~ ^\|\ response ]]; then - response_type=$(echo "$line" | sed 's/| response[[:space:]]*| //' | sed 's/ *|$//') - echo "**Response Type:** $response_type" >> "$output_file" - elif [[ "$line" =~ ^\|\ schema ]]; then - echo "**JSON Schema:**" >> "$output_file" - echo '```json' >> "$output_file" - echo "$line" | sed 's/| schema[[:space:]]*| //' | sed 's/ *|$//' >> "$output_file" - echo '```' >> "$output_file" - fi - done - done - - echo "Documentation generated: $output_file" -} - -generate_template_docs -``` - -### Template Validation -```bash -# Validate template configurations -validate_templates() { - echo "Template Validation Report" - echo "=========================" - - tg-show-prompts > temp_prompts.txt - - # Check for templates without variables - echo "Templates without variables:" - grep -B1 -A5 "^[a-zA-Z]" temp_prompts.txt | \ - grep -v "{{" | \ - grep "^[a-zA-Z][^:]*:$" | \ - sed 's/:$//' | \ - while read template; do - if ! grep -A5 "$template:" temp_prompts.txt | grep -q "{{"; then - echo " - $template" - fi - done - - # Check JSON templates have schemas - echo -e "\nJSON templates without schemas:" - grep -B1 -A10 "response.*json" temp_prompts.txt | \ - grep -B10 -A10 "response.*json" | \ - while read -r line; do - if [[ "$line" =~ ^([a-zA-Z][^:]*):$ ]]; then - template="${BASH_REMATCH[1]}" - if ! grep -A10 "$template:" temp_prompts.txt | grep -q "schema"; then - echo " - $template" - fi - fi - done - - rm temp_prompts.txt -} - -validate_templates -``` - -### Template Usage Examples -```bash -# Generate usage examples for templates -generate_usage_examples() { - local template_id="$1" - - echo "Usage examples for template: $template_id" - echo "========================================" - - # Extract template and find variables - tg-show-prompts | \ - awk "/^$template_id:/,/^$/" | \ - grep "| prompt" | \ - sed 's/| prompt[[:space:]]*| //' | \ - sed 's/ *|$//' | \ - while read prompt_text; do - echo "Template:" - echo "$prompt_text" - echo "" - - # Extract variables - variables=$(echo "$prompt_text" | grep -o "{{[^}]*}}" | sed 's/[{}]//g' | sort | uniq) - - if [ -n "$variables" ]; then - echo "Variables:" - for var in $variables; do - echo " - $var" - done - echo "" - - echo "Example usage:" - cmd="tg-invoke-prompt $template_id" - for var in $variables; do - case "$var" in - *name*) cmd="$cmd $var=\"John Doe\"" ;; - *text*|*content*) cmd="$cmd $var=\"Sample text content\"" ;; - *question*) cmd="$cmd $var=\"What is this about?\"" ;; - *context*) cmd="$cmd $var=\"Background information\"" ;; - *) cmd="$cmd $var=\"value\"" ;; - esac - done - echo "$cmd" - else - echo "No variables found." - echo "Usage: tg-invoke-prompt $template_id" - fi - done -} - -# Generate examples for specific template -generate_usage_examples "question" -``` - -### Environment Comparison -```bash -# Compare templates between environments -compare_environments() { - local env1_url="$1" - local env2_url="$2" - - echo "Comparing templates between environments" - echo "======================================" - - # Get templates from both environments - tg-show-prompts -u "$env1_url" | grep "^[a-zA-Z][^:]*:$" | sed 's/:$//' | sort > env1_templates.txt - tg-show-prompts -u "$env2_url" | grep "^[a-zA-Z][^:]*:$" | sed 's/:$//' | sort > env2_templates.txt - - echo "Environment 1 ($env1_url): $(wc -l < env1_templates.txt) templates" - echo "Environment 2 ($env2_url): $(wc -l < env2_templates.txt) templates" - echo "" - - # Find differences - echo "Templates only in Environment 1:" - comm -23 env1_templates.txt env2_templates.txt | sed 's/^/ - /' - - echo -e "\nTemplates only in Environment 2:" - comm -13 env1_templates.txt env2_templates.txt | sed 's/^/ - /' - - echo -e "\nCommon templates:" - comm -12 env1_templates.txt env2_templates.txt | sed 's/^/ - /' - - rm env1_templates.txt env2_templates.txt -} - -# Compare development and production -compare_environments "http://dev:8088/" "http://prod:8088/" -``` - -### Template Export/Import -```bash -# Export templates to JSON -export_templates() { - local output_file="$1" - - echo "Exporting templates to: $output_file" - - echo "{" > "$output_file" - echo " \"export_date\": \"$(date -Iseconds)\"," >> "$output_file" - echo " \"system_prompt\": \"$(tg-show-prompts | awk '/System prompt:/,/^\+.*\+$/' | grep '| prompt' | sed 's/| prompt[[:space:]]*| //' | sed 's/ *|$//' | sed 's/"/\\"/g')\"," >> "$output_file" - echo " \"templates\": {" >> "$output_file" - - first=true - tg-show-prompts | \ - grep "^[a-zA-Z][^:]*:$" | \ - sed 's/:$//' | \ - while read template_id; do - if [ "$first" = "false" ]; then - echo "," >> "$output_file" - fi - first=false - - echo -n " \"$template_id\": {" >> "$output_file" - - # Extract template details - tg-show-prompts | \ - awk "/^$template_id:/,/^$/" | \ - while read line; do - if [[ "$line" =~ ^\|\ prompt ]]; then - prompt=$(echo "$line" | sed 's/| prompt[[:space:]]*| //' | sed 's/ *|$//' | sed 's/"/\\"/g') - echo -n "\"prompt\": \"$prompt\"" >> "$output_file" - elif [[ "$line" =~ ^\|\ response ]]; then - response=$(echo "$line" | sed 's/| response[[:space:]]*| //' | sed 's/ *|$//') - echo -n ", \"response\": \"$response\"" >> "$output_file" - elif [[ "$line" =~ ^\|\ schema ]]; then - schema=$(echo "$line" | sed 's/| schema[[:space:]]*| //' | sed 's/ *|$//' | sed 's/"/\\"/g') - echo -n ", \"schema\": \"$schema\"" >> "$output_file" - fi - done - - echo "}" >> "$output_file" - done - - echo " }" >> "$output_file" - echo "}" >> "$output_file" - - echo "Export completed: $output_file" -} - -# Export current templates -export_templates "templates_backup.json" -``` - -## Error Handling - -### Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Verify user permissions for configuration access. - -### No Templates Found -```bash -# Empty output or no templates section -``` -**Solution**: Check if any templates are configured with `tg-set-prompt`. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-set-prompt`](tg-set-prompt.md) - Create/update prompt templates -- [`tg-invoke-prompt`](tg-invoke-prompt.md) - Use prompt templates -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Document-based queries - -## API Integration - -This command uses the [Config API](../apis/api-config.md) to retrieve prompt templates and system prompts from TrustGraph's configuration system. - -## Best Practices - -1. **Regular Review**: Periodically review templates for relevance and accuracy -2. **Documentation**: Document template purposes and expected variables -3. **Version Control**: Track template changes over time -4. **Testing**: Verify templates work as expected after viewing -5. **Organization**: Use consistent naming conventions for templates -6. **Cleanup**: Remove unused or outdated templates -7. **Backup**: Export templates for backup and migration purposes - -## Troubleshooting - -### Formatting Issues -```bash -# If output is garbled or truncated -export COLUMNS=120 -tg-show-prompts -``` - -### Missing Templates -```bash -# Check if templates are actually configured -tg-show-prompts | grep -c "^[a-zA-Z].*:$" - -# Verify API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/config" > /dev/null -``` - -### Template Not Displaying -```bash -# Check template was set correctly -tg-set-prompt --id "test" --prompt "test template" -tg-show-prompts | grep "test:" -``` \ No newline at end of file diff --git a/docs/cli/tg-show-token-costs.md b/docs/cli/tg-show-token-costs.md deleted file mode 100644 index 5b373f3f..00000000 --- a/docs/cli/tg-show-token-costs.md +++ /dev/null @@ -1,470 +0,0 @@ -# tg-show-token-costs - -Displays token cost configuration for language models in TrustGraph. - -## Synopsis - -```bash -tg-show-token-costs [options] -``` - -## Description - -The `tg-show-token-costs` command displays the configured token pricing for all language models in TrustGraph. This information shows input and output costs per million tokens, which is used for cost tracking, billing, and resource management. - -The costs are displayed in a tabular format showing model names and their associated pricing in dollars per million tokens. - -## Options - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Display All Token Costs -```bash -tg-show-token-costs -``` - -### Using Custom API URL -```bash -tg-show-token-costs -u http://production:8088/ -``` - -## Output Format - -The command displays costs in a formatted table: - -``` -+----------------+-------------+--------------+ -| model | input, $/Mt | output, $/Mt | -+----------------+-------------+--------------+ -| gpt-4 | 30.000 | 60.000 | -| gpt-3.5-turbo | 0.500 | 1.500 | -| claude-3-sonnet| 3.000 | 15.000 | -| claude-3-haiku | 0.250 | 1.250 | -| local-model | 0.000 | 0.000 | -+----------------+-------------+--------------+ -``` - -### Column Details - -- **model**: Language model identifier -- **input, $/Mt**: Cost per million input tokens in USD -- **output, $/Mt**: Cost per million output tokens in USD - -### Missing Configuration - -If a model has incomplete cost configuration: -``` -+----------------+-------------+--------------+ -| model | input, $/Mt | output, $/Mt | -+----------------+-------------+--------------+ -| unconfigured | - | - | -+----------------+-------------+--------------+ -``` - -## Use Cases - -### Cost Monitoring -```bash -# Check current cost configuration -tg-show-token-costs - -# Monitor costs over time -echo "$(date): $(tg-show-token-costs)" >> cost_history.log -``` - -### Cost Analysis -```bash -# Find most expensive models -tg-show-token-costs | grep -v "model" | sort -k3 -nr - -# Find free/local models -tg-show-token-costs | grep "0.000" -``` - -### Budget Planning -```bash -# Calculate potential costs for usage scenarios -analyze_costs() { - echo "Cost Analysis for Usage Scenarios" - echo "=================================" - - # Extract cost data - tg-show-token-costs | grep -v "model" | \ - while read -r line; do - model=$(echo "$line" | awk '{print $1}' | tr -d '|' | tr -d ' ') - input_cost=$(echo "$line" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$line" | awk '{print $3}' | tr -d '|' | tr -d ' ') - - if [[ "$input_cost" != "-" && "$output_cost" != "-" ]]; then - echo "Model: $model" - echo " 1M input tokens: \$${input_cost}" - echo " 1M output tokens: \$${output_cost}" - echo " 10K conversation (5K in/5K out): \$$(echo "scale=3; ($input_cost * 5 + $output_cost * 5) / 1000" | bc -l)" - echo "" - fi - done -} - -analyze_costs -``` - -### Environment Comparison -```bash -# Compare costs across environments -compare_costs() { - local env1_url="$1" - local env2_url="$2" - - echo "Cost Comparison" - echo "===============" - echo "Environment 1: $env1_url" - tg-show-token-costs -u "$env1_url" - - echo "" - echo "Environment 2: $env2_url" - tg-show-token-costs -u "$env2_url" -} - -compare_costs "http://dev:8088/" "http://prod:8088/" -``` - -## Advanced Usage - -### Cost Reporting -```bash -# Generate detailed cost report -generate_cost_report() { - local report_file="token_costs_$(date +%Y%m%d_%H%M%S).txt" - - echo "TrustGraph Token Cost Report" > "$report_file" - echo "Generated: $(date)" >> "$report_file" - echo "============================" >> "$report_file" - echo "" >> "$report_file" - - tg-show-token-costs >> "$report_file" - - echo "" >> "$report_file" - echo "Cost Analysis:" >> "$report_file" - echo "==============" >> "$report_file" - - # Add cost analysis - total_models=$(tg-show-token-costs | grep -c "|" | awk '{print $1-3}') # Subtract header rows - free_models=$(tg-show-token-costs | grep -c "0.000") - paid_models=$((total_models - free_models)) - - echo "Total models configured: $total_models" >> "$report_file" - echo "Paid models: $paid_models" >> "$report_file" - echo "Free models: $free_models" >> "$report_file" - - # Find most expensive models - echo "" >> "$report_file" - echo "Most expensive models (by output cost):" >> "$report_file" - tg-show-token-costs | grep -v "model" | grep -v "^\+" | \ - sort -k3 -nr | head -3 >> "$report_file" - - echo "Report saved: $report_file" -} - -generate_cost_report -``` - -### Cost Validation -```bash -# Validate cost configuration -validate_cost_config() { - echo "Cost Configuration Validation" - echo "=============================" - - local issues=0 - - # Check for unconfigured models - unconfigured=$(tg-show-token-costs | grep -c "\-") - if [ "$unconfigured" -gt 0 ]; then - echo "⚠ Warning: $unconfigured models have incomplete cost configuration" - tg-show-token-costs | grep "\-" - issues=$((issues + 1)) - fi - - # Check for zero-cost models (might be intentional) - zero_cost=$(tg-show-token-costs | grep -c "0.000.*0.000") - if [ "$zero_cost" -gt 0 ]; then - echo "ℹ Info: $zero_cost models configured with zero cost (likely local models)" - fi - - # Check for unusual cost patterns - tg-show-token-costs | grep -v "model" | grep -v "^\+" | \ - while read -r line; do - input_cost=$(echo "$line" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$line" | awk '{print $3}' | tr -d '|' | tr -d ' ') - model=$(echo "$line" | awk '{print $1}' | tr -d '|' | tr -d ' ') - - if [[ "$input_cost" != "-" && "$output_cost" != "-" ]]; then - # Check if output cost is lower than input cost (unusual) - if (( $(echo "$output_cost < $input_cost" | bc -l) )); then - echo "⚠ Warning: $model has output cost lower than input cost" - issues=$((issues + 1)) - fi - - # Check for extremely high costs - if (( $(echo "$input_cost > 100" | bc -l) )) || (( $(echo "$output_cost > 200" | bc -l) )); then - echo "⚠ Warning: $model has unusually high costs" - issues=$((issues + 1)) - fi - fi - done - - if [ "$issues" -eq 0 ]; then - echo "✓ Cost configuration appears valid" - else - echo "Found $issues potential issues" - fi -} - -validate_cost_config -``` - -### Cost Tracking -```bash -# Track cost changes over time -track_cost_changes() { - local history_file="cost_history.txt" - local current_file="current_costs.tmp" - - # Get current costs - tg-show-token-costs > "$current_file" - - # Check if this is first run - if [ ! -f "$history_file" ]; then - echo "$(date): Initial cost configuration" >> "$history_file" - cat "$current_file" >> "$history_file" - echo "---" >> "$history_file" - else - # Compare with last known state - if ! diff -q "$history_file" "$current_file" > /dev/null 2>&1; then - echo "$(date): Cost configuration changed" >> "$history_file" - - # Show differences - echo "Changes:" >> "$history_file" - diff "$history_file" "$current_file" | tail -n +1 >> "$history_file" - - echo "New configuration:" >> "$history_file" - cat "$current_file" >> "$history_file" - echo "---" >> "$history_file" - - echo "Cost changes detected and logged to $history_file" - else - echo "No cost changes detected" - fi - fi - - rm "$current_file" -} - -track_cost_changes -``` - -### Export Cost Data -```bash -# Export costs to CSV -export_costs_csv() { - local output_file="$1" - - echo "model,input_cost_per_million,output_cost_per_million" > "$output_file" - - tg-show-token-costs | grep -v "model" | grep -v "^\+" | \ - while read -r line; do - model=$(echo "$line" | awk '{print $1}' | tr -d '|' | tr -d ' ') - input_cost=$(echo "$line" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$line" | awk '{print $3}' | tr -d '|' | tr -d ' ') - - if [[ "$model" != "" ]]; then - echo "$model,$input_cost,$output_cost" >> "$output_file" - fi - done - - echo "Costs exported to: $output_file" -} - -# Export to CSV -export_costs_csv "token_costs.csv" - -# Export to JSON -export_costs_json() { - local output_file="$1" - - echo "{" > "$output_file" - echo " \"export_date\": \"$(date -Iseconds)\"," >> "$output_file" - echo " \"models\": [" >> "$output_file" - - first=true - tg-show-token-costs | grep -v "model" | grep -v "^\+" | \ - while read -r line; do - model=$(echo "$line" | awk '{print $1}' | tr -d '|' | tr -d ' ') - input_cost=$(echo "$line" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$line" | awk '{print $3}' | tr -d '|' | tr -d ' ') - - if [[ "$model" != "" ]]; then - if [ "$first" = "false" ]; then - echo "," >> "$output_file" - fi - first=false - - echo " {" >> "$output_file" - echo " \"model\": \"$model\"," >> "$output_file" - echo " \"input_cost\": \"$input_cost\"," >> "$output_file" - echo " \"output_cost\": \"$output_cost\"" >> "$output_file" - echo -n " }" >> "$output_file" - fi - done - - echo "" >> "$output_file" - echo " ]" >> "$output_file" - echo "}" >> "$output_file" - - echo "Costs exported to: $output_file" -} - -export_costs_json "token_costs.json" -``` - -### Cost Calculation Tools -```bash -# Calculate costs for usage scenarios -calculate_usage_cost() { - local model="$1" - local input_tokens="$2" - local output_tokens="$3" - - echo "Calculating cost for $model usage:" - echo " Input tokens: $input_tokens" - echo " Output tokens: $output_tokens" - - # Extract costs for specific model - costs=$(tg-show-token-costs | grep "$model") - - if [ -z "$costs" ]; then - echo "Error: Model $model not found in cost configuration" - return 1 - fi - - input_cost=$(echo "$costs" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$costs" | awk '{print $3}' | tr -d '|' | tr -d ' ') - - if [[ "$input_cost" == "-" || "$output_cost" == "-" ]]; then - echo "Error: Incomplete cost configuration for $model" - return 1 - fi - - # Calculate total cost - total_cost=$(echo "scale=6; ($input_tokens * $input_cost / 1000000) + ($output_tokens * $output_cost / 1000000)" | bc -l) - - echo " Input cost: \$$(echo "scale=6; $input_tokens * $input_cost / 1000000" | bc -l)" - echo " Output cost: \$$(echo "scale=6; $output_tokens * $output_cost / 1000000" | bc -l)" - echo " Total cost: \$${total_cost}" -} - -# Example usage calculations -calculate_usage_cost "gpt-4" 1000 500 -calculate_usage_cost "claude-3-sonnet" 5000 2000 -``` - -### Model Cost Comparison -```bash -# Compare costs across models for same usage -compare_model_costs() { - local input_tokens="${1:-1000}" - local output_tokens="${2:-500}" - - echo "Cost comparison for $input_tokens input + $output_tokens output tokens:" - echo "=====================================================================" - - tg-show-token-costs | grep -v "model" | grep -v "^\+" | \ - while read -r line; do - model=$(echo "$line" | awk '{print $1}' | tr -d '|' | tr -d ' ') - input_cost=$(echo "$line" | awk '{print $2}' | tr -d '|' | tr -d ' ') - output_cost=$(echo "$line" | awk '{print $3}' | tr -d '|' | tr -d ' ') - - if [[ "$model" != "" && "$input_cost" != "-" && "$output_cost" != "-" ]]; then - total_cost=$(echo "scale=4; ($input_tokens * $input_cost / 1000000) + ($output_tokens * $output_cost / 1000000)" | bc -l) - printf "%-20s \$%s\n" "$model" "$total_cost" - fi - done | sort -k2 -n -} - -# Compare costs for typical usage -compare_model_costs 1000 500 -``` - -## Error Handling - -### Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Verify user permissions for configuration access. - -### No Models Configured -```bash -# Empty table or no data -``` -**Solution**: Configure model costs with `tg-set-token-costs`. - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-set-token-costs`](tg-set-token-costs.md) - Configure token costs -- [`tg-show-config`](tg-show-config.md) - Show other configuration settings (if available) - -## API Integration - -This command uses the [Config API](../apis/api-config.md) to retrieve token cost configuration from TrustGraph's configuration system. - -## Best Practices - -1. **Regular Review**: Check cost configurations regularly -2. **Cost Tracking**: Monitor cost changes over time -3. **Validation**: Validate cost configurations for accuracy -4. **Documentation**: Document cost sources and update procedures -5. **Reporting**: Generate regular cost reports for budget planning -6. **Comparison**: Compare costs across environments -7. **Automation**: Automate cost monitoring and alerting - -## Troubleshooting - -### Missing Cost Data -```bash -# Check if models are configured -tg-show-token-costs | grep -c "model" - -# Verify specific model exists -tg-show-token-costs | grep "model-name" -``` - -### Formatting Issues -```bash -# If table is garbled -export COLUMNS=120 -tg-show-token-costs -``` - -### Incomplete Data -```bash -# Look for models with missing costs -tg-show-token-costs | grep "\-" - -# Set missing costs -tg-set-token-costs --model "incomplete-model" -i 1.0 -o 2.0 -``` \ No newline at end of file diff --git a/docs/cli/tg-show-token-rate.md b/docs/cli/tg-show-token-rate.md deleted file mode 100644 index 99cd1193..00000000 --- a/docs/cli/tg-show-token-rate.md +++ /dev/null @@ -1,246 +0,0 @@ -# tg-show-token-rate - -## Synopsis - -``` -tg-show-token-rate [OPTIONS] -``` - -## Description - -The `tg-show-token-rate` command displays a live stream of token usage rates from TrustGraph processors. It monitors both input and output tokens, showing instantaneous rates and cumulative averages over time. This command is essential for monitoring LLM token consumption and understanding processing throughput. - -The command queries the metrics endpoint for token usage data and displays: -- Input token rates (tokens per second) -- Output token rates (tokens per second) -- Total token rates (combined input + output) - -All rates are calculated as averages since the command started running. - -## Options - -- `-m, --metrics-url URL` - - Metrics endpoint URL to query for token information - - Default: `http://localhost:8088/api/metrics` - - Should point to a Prometheus-compatible metrics endpoint - -- `-p, --period SECONDS` - - Sampling period in seconds between measurements - - Default: `1` - - Controls how frequently token rates are updated - -- `-n, --number-samples COUNT` - - Number of samples to collect before stopping - - Default: `100` - - Set to a large value for continuous monitoring - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic Usage - -Monitor token rates with default settings (1-second intervals, 100 samples): -```bash -tg-show-token-rate -``` - -### Custom Sampling Period - -Monitor token rates with 5-second intervals: -```bash -tg-show-token-rate --period 5 -``` - -### Continuous Monitoring - -Monitor token rates continuously (1000 samples): -```bash -tg-show-token-rate -n 1000 -``` - -### Remote Monitoring - -Monitor token rates from a remote TrustGraph instance: -```bash -tg-show-token-rate -m http://10.0.1.100:8088/api/metrics -``` - -### High-Frequency Monitoring - -Monitor token rates with sub-second precision: -```bash -tg-show-token-rate --period 0.5 --number-samples 200 -``` - -## Output Format - -The command displays a table with continuously updated token rates: -``` - Input Output Total - ----- ------ ----- - 12.3 8.7 21.0 - 15.2 10.1 25.3 - 18.7 12.4 31.1 - ... -``` - -Each row shows: -- **Input**: Average input tokens per second since monitoring started -- **Output**: Average output tokens per second since monitoring started -- **Total**: Combined input + output tokens per second - -## Advanced Usage - -### Token Rate Analysis - -Create a script to analyze token usage patterns: -```bash -#!/bin/bash -echo "Starting token rate analysis..." -tg-show-token-rate --period 2 --number-samples 60 > token_rates.txt -echo "Analysis complete. Data saved to token_rates.txt" -``` - -### Performance Monitoring - -Monitor token rates during load testing: -```bash -#!/bin/bash -echo "Starting load test monitoring..." -tg-show-token-rate --period 1 --number-samples 300 | tee load_test_tokens.log -``` - -### Alert on High Token Usage - -Create an alert script for excessive token consumption: -```bash -#!/bin/bash -tg-show-token-rate -n 10 -p 5 | tail -n 1 | awk '{ - if ($3 > 100) { - print "WARNING: High token rate detected:", $3, "tokens/sec" - exit 1 - } -}' -``` - -### Cost Estimation - -Estimate token costs during processing: -```bash -#!/bin/bash -echo "Monitoring token usage for cost estimation..." -tg-show-token-rate --period 10 --number-samples 36 | \ -awk 'NR>2 {total+=$3} END {print "Average tokens/sec:", total/NR-2}' -``` - -## Error Handling - -The command handles various error conditions: - -- **Connection errors**: If the metrics endpoint is unavailable -- **Invalid JSON**: If the metrics response is malformed -- **Missing metrics**: If token metrics are not found -- **Network timeouts**: If requests to the metrics endpoint time out - -Common error scenarios: -```bash -# Metrics endpoint not available -tg-show-token-rate -m http://invalid-host:8088/api/metrics -# Output: Exception: [Connection error details] - -# Invalid period value -tg-show-token-rate --period 0 -# Output: Exception: [Invalid period error] -``` - -## Integration with Other Commands - -### With Cost Monitoring - -Combine with token cost analysis: -```bash -echo "=== Token Rates ===" -tg-show-token-rate -n 5 -p 2 -echo -echo "=== Token Costs ===" -tg-show-token-costs -``` - -### With Processor State - -Monitor tokens alongside processor health: -```bash -echo "=== Processor States ===" -tg-show-processor-state -echo -echo "=== Token Rates ===" -tg-show-token-rate -n 10 -p 1 -``` - -### With Flow Monitoring - -Track token usage per flow: -```bash -#!/bin/bash -echo "=== Active Flows ===" -tg-show-flows -echo -echo "=== Token Usage ===" -tg-show-token-rate -n 20 -p 3 -``` - -## Best Practices - -1. **Baseline Monitoring**: Establish baseline token rates for normal operation -2. **Alert Thresholds**: Set up alerts for unusually high token consumption -3. **Cost Tracking**: Monitor token rates to estimate operational costs -4. **Load Testing**: Use during load testing to understand capacity limits -5. **Historical Analysis**: Save token rate data for trend analysis - -## Troubleshooting - -### No Token Data - -If no token rates are displayed: -1. Verify that TrustGraph processors are actively processing requests -2. Check that token metrics are being exported properly -3. Ensure the metrics endpoint is accessible -4. Verify that LLM services are receiving requests - -### Inconsistent Rates - -For inconsistent or erratic token rates: -1. Check for network issues affecting metrics collection -2. Verify that the sampling period is appropriate for your workload -3. Ensure multiple processors aren't conflicting -4. Check system resources (CPU, memory) on the TrustGraph instance - -### High Token Rates - -If token rates are unexpectedly high: -1. Investigate the types of queries being processed -2. Check for inefficient prompts or large document processing -3. Verify that caching is working properly -4. Consider if the workload justifies the token usage - -## Performance Considerations - -- **Sampling Frequency**: Higher frequencies provide more granular data but consume more resources -- **Network Latency**: Consider network latency when setting sampling periods -- **Metrics Storage**: Long monitoring sessions generate significant data -- **Resource Usage**: The command itself uses minimal resources - -## Related Commands - -- [`tg-show-token-costs`](tg-show-token-costs.md) - Display token usage costs -- [`tg-show-processor-state`](tg-show-processor-state.md) - Show processor states -- [`tg-show-flow-state`](tg-show-flow-state.md) - Display flow processor states -- [`tg-show-config`](tg-show-config.md) - Show TrustGraph configuration - -## See Also - -- TrustGraph Token Management Documentation -- Prometheus Metrics Configuration -- LLM Cost Optimization Guide \ No newline at end of file diff --git a/docs/cli/tg-show-tools.md b/docs/cli/tg-show-tools.md deleted file mode 100644 index 9abaca2e..00000000 --- a/docs/cli/tg-show-tools.md +++ /dev/null @@ -1,283 +0,0 @@ -# tg-show-tools - -## Synopsis - -``` -tg-show-tools [OPTIONS] -``` - -## Description - -The `tg-show-tools` command displays the current agent tool configuration from TrustGraph. It retrieves and presents detailed information about all available tools that agents can use, including their descriptions, arguments, and parameter types. - -This command is useful for: -- Understanding available agent tools and their capabilities -- Debugging agent tool configuration issues -- Documenting the current tool set -- Verifying tool definitions and argument specifications - -The command queries the TrustGraph API to fetch the tool index and individual tool definitions, then presents them in a formatted table for easy reading. - -## Options - -- `-u, --api-url URL` - - TrustGraph API URL to query for tool configuration - - Default: `http://localhost:8088/` (or `TRUSTGRAPH_URL` environment variable) - - Should point to a running TrustGraph API instance - -- `-h, --help` - - Show help message and exit - -## Examples - -### Basic Usage - -Display all available agent tools using the default API URL: -```bash -tg-show-tools -``` - -### Custom API URL - -Display tools from a specific TrustGraph instance: -```bash -tg-show-tools -u http://trustgraph.example.com:8088/ -``` - -### Remote Instance - -Query tools from a remote TrustGraph deployment: -```bash -tg-show-tools --api-url http://10.0.1.100:8088/ -``` - -### Using Environment Variable - -Set the API URL via environment variable: -```bash -export TRUSTGRAPH_URL=http://production.trustgraph.com:8088/ -tg-show-tools -``` - -## Output Format - -The command displays each tool in a detailed table format: -``` -web-search: -+-------------+----------------------------------------------------------------------+ -| id | web-search | -+-------------+----------------------------------------------------------------------+ -| name | Web Search | -+-------------+----------------------------------------------------------------------+ -| description | Search the web for information using a search engine | -+-------------+----------------------------------------------------------------------+ -| arg 0 | query: string | -| | The search query to execute | -+-------------+----------------------------------------------------------------------+ -| arg 1 | max_results: integer | -| | Maximum number of search results to return | -+-------------+----------------------------------------------------------------------+ - -file-read: -+-------------+----------------------------------------------------------------------+ -| id | file-read | -+-------------+----------------------------------------------------------------------+ -| name | File Reader | -+-------------+----------------------------------------------------------------------+ -| description | Read contents of a file from the filesystem | -+-------------+----------------------------------------------------------------------+ -| arg 0 | path: string | -| | Path to the file to read | -+-------------+----------------------------------------------------------------------+ -``` - -For each tool, the output includes: -- **id**: Unique identifier for the tool -- **name**: Human-readable name of the tool -- **description**: Detailed description of what the tool does -- **arg N**: Arguments the tool accepts, with name, type, and description - -## Advanced Usage - -### Tool Inventory - -Create a complete inventory of available tools: -```bash -#!/bin/bash -echo "=== TrustGraph Agent Tools Inventory ===" -echo "Generated on: $(date)" -echo -tg-show-tools > tools_inventory.txt -echo "Inventory saved to tools_inventory.txt" -``` - -### Tool Comparison - -Compare tools across different environments: -```bash -#!/bin/bash -echo "=== Development Tools ===" -tg-show-tools -u http://dev.trustgraph.com:8088/ > dev_tools.txt -echo -echo "=== Production Tools ===" -tg-show-tools -u http://prod.trustgraph.com:8088/ > prod_tools.txt -echo -diff dev_tools.txt prod_tools.txt -``` - -### Tool Documentation - -Generate documentation for agent tools: -```bash -#!/bin/bash -echo "# Available Agent Tools" > AGENT_TOOLS.md -echo "" >> AGENT_TOOLS.md -echo "Generated on: $(date)" >> AGENT_TOOLS.md -echo "" >> AGENT_TOOLS.md -tg-show-tools >> AGENT_TOOLS.md -``` - -### Tool Configuration Validation - -Validate tool configuration after updates: -```bash -#!/bin/bash -echo "Validating tool configuration..." -if tg-show-tools > /dev/null 2>&1; then - echo "✓ Tool configuration is valid" - tool_count=$(tg-show-tools | grep -c "^[a-zA-Z].*:$") - echo "✓ Found $tool_count tools" -else - echo "✗ Tool configuration validation failed" - exit 1 -fi -``` - -## Error Handling - -The command handles various error conditions: - -- **API connection errors**: If the TrustGraph API is unavailable -- **Authentication errors**: If API access is denied -- **Invalid configuration**: If tool configuration is malformed -- **Network timeouts**: If API requests time out - -Common error scenarios: -```bash -# API not available -tg-show-tools -u http://invalid-host:8088/ -# Output: Exception: [Connection error details] - -# Invalid API URL -tg-show-tools --api-url "not-a-url" -# Output: Exception: [URL parsing error] - -# Configuration not found -# Output: Exception: [Configuration retrieval error] -``` - -## Integration with Other Commands - -### With Agent Configuration - -Display tools alongside agent configuration: -```bash -echo "=== Agent Tools ===" -tg-show-tools -echo -echo "=== Agent Configuration ===" -tg-show-config -``` - -### With Flow Analysis - -Understand tools used in flows: -```bash -echo "=== Available Tools ===" -tg-show-tools -echo -echo "=== Active Flows ===" -tg-show-flows -``` - -### With Prompt Analysis - -Analyze tool usage in prompts: -```bash -echo "=== Agent Tools ===" -tg-show-tools | grep -E "^[a-zA-Z].*:$" -echo -echo "=== Available Prompts ===" -tg-show-prompts -``` - -## Best Practices - -1. **Regular Documentation**: Keep tool documentation updated -2. **Version Control**: Track tool configuration changes -3. **Testing**: Test tool functionality after configuration changes -4. **Security**: Review tool permissions and capabilities -5. **Monitoring**: Monitor tool usage and performance - -## Troubleshooting - -### No Tools Displayed - -If no tools are shown: -1. Verify the TrustGraph API is running and accessible -2. Check that tool configuration has been properly loaded -3. Ensure the API URL is correct -4. Verify network connectivity - -### Incomplete Tool Information - -If tool information is missing or incomplete: -1. Check the tool configuration files -2. Verify the tool index is properly maintained -3. Ensure tool definitions are valid JSON -4. Check for configuration loading errors - -### Tool Configuration Errors - -If tools are not working as expected: -1. Validate tool definitions against the schema -2. Check for missing or invalid arguments -3. Verify tool implementation is available -4. Review agent logs for tool execution errors - -## Tool Management - -### Adding New Tools - -After adding new tools to the system: -```bash -# Verify the new tool appears -tg-show-tools | grep "new-tool-name" - -# Test the tool configuration -tg-show-tools > current_tools.txt -``` - -### Removing Tools - -After removing tools: -```bash -# Verify the tool is no longer listed -tg-show-tools | grep -v "removed-tool-name" - -# Update tool documentation -tg-show-tools > updated_tools.txt -``` - -## Related Commands - -- [`tg-show-config`](tg-show-config.md) - Show TrustGraph configuration -- [`tg-show-prompts`](tg-show-prompts.md) - Display available prompts -- [`tg-show-flows`](tg-show-flows.md) - Show active flows -- [`tg-invoke-agent`](tg-invoke-agent.md) - Invoke agent with tools - -## See Also - -- TrustGraph Agent Documentation -- Tool Configuration Guide -- Agent API Reference \ No newline at end of file diff --git a/docs/cli/tg-start-flow.md b/docs/cli/tg-start-flow.md deleted file mode 100644 index 0257d855..00000000 --- a/docs/cli/tg-start-flow.md +++ /dev/null @@ -1,189 +0,0 @@ -# tg-start-flow - -Starts a processing flow using a defined flow blueprint. - -## Synopsis - -```bash -tg-start-flow -n CLASS_NAME -i FLOW_ID -d DESCRIPTION [options] -``` - -## Description - -The `tg-start-flow` command creates and starts a new processing flow instance based on a predefined flow blueprint. Flow blueprintes define the processing pipeline configuration, while flow instances are running implementations of those classes with specific identifiers. - -Once started, a flow provides endpoints for document processing, knowledge queries, and other TrustGraph services through its configured interfaces. - -## Options - -### Required Arguments - -- `-n, --blueprint-name CLASS_NAME`: Name of the flow blueprint to instantiate -- `-i, --flow-id FLOW_ID`: Unique identifier for the new flow instance -- `-d, --description DESCRIPTION`: Human-readable description of the flow - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Start Basic Document Processing Flow -```bash -tg-start-flow \ - -n "document-rag+graph-rag" \ - -i "research-flow" \ - -d "Research document processing pipeline" -``` - -### Start Custom Flow Blueprint -```bash -tg-start-flow \ - -n "medical-analysis" \ - -i "medical-research-2024" \ - -d "Medical research analysis for 2024 studies" -``` - -### Using Custom API URL -```bash -tg-start-flow \ - -n "document-processing" \ - -i "production-flow" \ - -d "Production document processing" \ - -u http://production:8088/ -``` - -## Prerequisites - -### Flow Blueprint Must Exist -Before starting a flow, the flow blueprint must be available in the system: - -```bash -# Check available flow blueprintes -tg-show-flow-blueprints - -# Upload a flow blueprint if needed -tg-put-flow-blueprint -n "my-class" -f flow-definition.json -``` - -### System Requirements -- TrustGraph API gateway must be running -- Required processing components must be available -- Sufficient system resources for the flow's processing needs - -## Flow Lifecycle - -1. **Flow Blueprint Definition**: Flow blueprintes define processing pipelines -2. **Flow Instance Creation**: `tg-start-flow` creates a running instance -3. **Service Availability**: Flow provides configured service endpoints -4. **Processing**: Documents and queries can be processed through the flow -5. **Flow Termination**: Use `tg-stop-flow` to stop the instance - -## Error Handling - -### Flow Blueprint Not Found -```bash -Exception: Flow blueprint 'invalid-class' not found -``` -**Solution**: Check available flow blueprintes with `tg-show-flow-blueprints` and ensure the class name is correct. - -### Flow ID Already Exists -```bash -Exception: Flow ID 'my-flow' already exists -``` -**Solution**: Choose a different flow ID or stop the existing flow with `tg-stop-flow`. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Resource Errors -```bash -Exception: Insufficient resources to start flow -``` -**Solution**: Check system resources and ensure required processing components are available. - -## Output - -On successful flow creation: -```bash -Flow 'research-flow' started successfully using class 'document-rag+graph-rag' -``` - -## Flow Configuration - -Once started, flows provide service interfaces based on their class definition. Common interfaces include: - -### Request/Response Services -- **agent**: Interactive Q&A service -- **graph-rag**: Graph-based retrieval augmented generation -- **document-rag**: Document-based retrieval augmented generation -- **text-completion**: LLM text completion -- **embeddings**: Text embedding generation -- **triples**: Knowledge graph queries - -### Fire-and-Forget Services -- **text-load**: Text document loading -- **document-load**: Document file loading -- **triples-store**: Knowledge graph storage - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-stop-flow`](tg-stop-flow.md) - Stop a running flow -- [`tg-show-flows`](tg-show-flows.md) - List active flows and their interfaces -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes -- [`tg-put-flow-blueprint`](tg-put-flow-blueprint.md) - Upload/update flow blueprint definitions -- [`tg-show-flow-state`](tg-show-flow-state.md) - Check flow status - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `start-flow` operation to create and start flow instances. - -## Use Cases - -### Development Environment -```bash -tg-start-flow \ - -n "dev-pipeline" \ - -i "dev-$(date +%Y%m%d)" \ - -d "Development testing flow for $(date)" -``` - -### Research Projects -```bash -tg-start-flow \ - -n "research-analysis" \ - -i "climate-study" \ - -d "Climate change research document analysis" -``` - -### Production Processing -```bash -tg-start-flow \ - -n "production-pipeline" \ - -i "prod-primary" \ - -d "Primary production document processing pipeline" -``` - -### Specialized Processing -```bash -tg-start-flow \ - -n "medical-nlp" \ - -i "medical-trials" \ - -d "Medical trial document analysis and extraction" -``` - -## Best Practices - -1. **Descriptive IDs**: Use meaningful flow IDs that indicate purpose and scope -2. **Clear Descriptions**: Provide detailed descriptions for flow tracking -3. **Resource Planning**: Ensure adequate resources before starting flows -4. **Monitoring**: Use `tg-show-flows` to monitor active flows -5. **Cleanup**: Stop unused flows to free up resources -6. **Documentation**: Document flow purposes and configurations for team use \ No newline at end of file diff --git a/docs/cli/tg-start-library-processing.md b/docs/cli/tg-start-library-processing.md deleted file mode 100644 index ee5ceb33..00000000 --- a/docs/cli/tg-start-library-processing.md +++ /dev/null @@ -1,563 +0,0 @@ -# tg-start-library-processing - -Submits a library document for processing through TrustGraph workflows. - -## Synopsis - -```bash -tg-start-library-processing -d DOCUMENT_ID --id PROCESSING_ID [options] -``` - -## Description - -The `tg-start-library-processing` command initiates processing of a document stored in TrustGraph's document library. This triggers workflows that can extract text, generate embeddings, create knowledge graphs, and enable document search and analysis. - -Each processing job is assigned a unique processing ID for tracking and management purposes. - -## Options - -### Required Arguments - -- `-d, --document-id ID`: Document ID from the library to process -- `--id, --processing-id ID`: Unique identifier for this processing job - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID for processing context (default: `trustgraph`) -- `-i, --flow-id ID`: Flow instance to use for processing (default: `default`) -- `--collection COLLECTION`: Collection to assign processed data (default: `default`) -- `--tags TAGS`: Comma-separated tags for the processing job - -## Examples - -### Basic Document Processing -```bash -tg-start-library-processing -d "doc_123456789" --id "proc_001" -``` - -### Processing with Custom Collection -```bash -tg-start-library-processing \ - -d "research_paper_456" \ - --id "research_proc_001" \ - --collection "research-papers" \ - --tags "nlp,research,2023" -``` - -### Processing with Specific Flow -```bash -tg-start-library-processing \ - -d "technical_manual" \ - --id "manual_proc_001" \ - -i "document-analysis-flow" \ - -U "technical-team" \ - --collection "technical-docs" -``` - -### Processing Multiple Documents -```bash -# Process several documents in sequence -documents=("doc_001" "doc_002" "doc_003") -for i in "${!documents[@]}"; do - doc_id="${documents[$i]}" - proc_id="batch_proc_$(printf %03d $((i+1)))" - - echo "Processing document: $doc_id" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "batch-processing" \ - --tags "batch,automated" -done -``` - -## Processing Workflow - -### Document Processing Steps -1. **Document Retrieval**: Fetch document from library -2. **Content Extraction**: Extract text and metadata -3. **Text Processing**: Clean and normalize content -4. **Embedding Generation**: Create vector embeddings -5. **Knowledge Extraction**: Generate triples and entities -6. **Index Creation**: Make content searchable - -### Processing Types -Different document types may trigger different processing workflows: -- **PDF Documents**: Text extraction, OCR if needed -- **Text Files**: Direct text processing -- **Images**: OCR and image analysis -- **Structured Data**: Schema extraction and mapping - -## Use Cases - -### Batch Document Processing -```bash -# Process all unprocessed documents -process_all_documents() { - local collection="$1" - local batch_id="batch_$(date +%Y%m%d_%H%M%S)" - - echo "Starting batch processing for collection: $collection" - - # Get all document IDs - tg-show-library-documents | \ - grep "| id" | \ - awk '{print $3}' | \ - while read -r doc_id; do - proc_id="${batch_id}_${doc_id}" - - echo "Processing document: $doc_id" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "$collection" \ - --tags "batch,automated,$(date +%Y%m%d)" - - # Add delay to avoid overwhelming the system - sleep 2 - done -} - -# Process all documents -process_all_documents "processed-docs" -``` - -### Department-Specific Processing -```bash -# Process documents by department -process_by_department() { - local dept="$1" - local flow="$2" - - echo "Processing documents for department: $dept" - - # Find documents with department tag - tg-show-library-documents -U "$dept" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read -r doc_id; do - proc_id="${dept}_proc_$(date +%s)_${doc_id}" - - echo "Processing $dept document: $doc_id" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - -i "$flow" \ - -U "$dept" \ - --collection "${dept}-processed" \ - --tags "$dept,departmental" - done -} - -# Process documents for different departments -process_by_department "research" "research-flow" -process_by_department "finance" "document-flow" -process_by_department "legal" "compliance-flow" -``` - -### Priority Processing -```bash -# Process high-priority documents first -priority_processing() { - local priority_tags=("urgent" "high-priority" "critical") - - for tag in "${priority_tags[@]}"; do - echo "Processing $tag documents..." - - tg-show-library-documents | \ - grep -B5 -A5 "$tag" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read -r doc_id; do - proc_id="priority_$(date +%s)_${doc_id}" - - echo "Processing priority document: $doc_id" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "priority-processed" \ - --tags "priority,$tag" - done - done -} - -priority_processing -``` - -### Conditional Processing -```bash -# Process documents based on criteria -conditional_processing() { - local criteria="$1" - local flow="$2" - - echo "Processing documents matching criteria: $criteria" - - tg-show-library-documents | \ - grep -B10 -A10 "$criteria" | \ - grep "| id" | \ - awk '{print $3}' | \ - while read -r doc_id; do - # Check if already processed - if tg-invoke-document-rag -q "test" 2>/dev/null | grep -q "$doc_id"; then - echo "Document $doc_id already processed, skipping" - continue - fi - - proc_id="conditional_$(date +%s)_${doc_id}" - - echo "Processing document: $doc_id" - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - -i "$flow" \ - --collection "conditional-processed" \ - --tags "conditional,$criteria" - done -} - -# Process technical documents -conditional_processing "technical" "technical-flow" -``` - -## Advanced Usage - -### Processing with Validation -```bash -# Process with pre and post validation -validated_processing() { - local doc_id="$1" - local proc_id="$2" - local collection="$3" - - echo "Starting validated processing for: $doc_id" - - # Pre-processing validation - if ! tg-show-library-documents | grep -q "$doc_id"; then - echo "ERROR: Document $doc_id not found" - return 1 - fi - - # Check if processing ID is unique - if tg-show-flows | grep -q "$proc_id"; then - echo "ERROR: Processing ID $proc_id already in use" - return 1 - fi - - # Start processing - echo "Starting processing..." - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "$collection" \ - --tags "validated,$(date +%Y%m%d)" - - # Monitor processing - echo "Monitoring processing progress..." - timeout=300 # 5 minutes - elapsed=0 - interval=10 - - while [ $elapsed -lt $timeout ]; do - if tg-invoke-document-rag -q "test" -C "$collection" 2>/dev/null | grep -q "$doc_id"; then - echo "✓ Processing completed successfully" - return 0 - fi - - echo "Processing in progress... (${elapsed}s elapsed)" - sleep $interval - elapsed=$((elapsed + interval)) - done - - echo "⚠ Processing timeout reached" - return 1 -} - -# Usage -validated_processing "doc_123" "validated_proc_001" "validated-docs" -``` - -### Parallel Processing with Limits -```bash -# Process multiple documents in parallel with concurrency limits -parallel_processing() { - local doc_list=("$@") - local max_concurrent=5 - local current_jobs=0 - - echo "Processing ${#doc_list[@]} documents with max $max_concurrent concurrent jobs" - - for doc_id in "${doc_list[@]}"; do - # Wait if max concurrent jobs reached - while [ $current_jobs -ge $max_concurrent ]; do - wait -n # Wait for any job to complete - current_jobs=$((current_jobs - 1)) - done - - # Start processing in background - ( - proc_id="parallel_$(date +%s)_${doc_id}" - echo "Starting processing: $doc_id" - - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "parallel-processed" \ - --tags "parallel,batch" - - echo "Completed processing: $doc_id" - ) & - - current_jobs=$((current_jobs + 1)) - done - - # Wait for all remaining jobs - wait - echo "All processing jobs completed" -} - -# Get document list and process in parallel -doc_list=($(tg-show-library-documents | grep "| id" | awk '{print $3}')) -parallel_processing "${doc_list[@]}" -``` - -### Processing with Retry Logic -```bash -# Process with automatic retry on failure -processing_with_retry() { - local doc_id="$1" - local proc_id="$2" - local max_retries=3 - local retry_delay=30 - - for attempt in $(seq 1 $max_retries); do - echo "Processing attempt $attempt/$max_retries for document: $doc_id" - - if tg-start-library-processing \ - -d "$doc_id" \ - --id "${proc_id}_attempt_${attempt}" \ - --collection "retry-processed" \ - --tags "retry,attempt_$attempt"; then - - # Wait and check if processing succeeded - sleep $retry_delay - - if tg-invoke-document-rag -q "test" 2>/dev/null | grep -q "$doc_id"; then - echo "✓ Processing succeeded on attempt $attempt" - return 0 - else - echo "Processing started but content not yet accessible" - fi - else - echo "✗ Processing failed on attempt $attempt" - fi - - if [ $attempt -lt $max_retries ]; then - echo "Retrying in ${retry_delay}s..." - sleep $retry_delay - fi - done - - echo "✗ Processing failed after $max_retries attempts" - return 1 -} - -# Usage -processing_with_retry "doc_123" "retry_proc_001" -``` - -### Configuration-Driven Processing -```bash -# Process documents based on configuration file -config_driven_processing() { - local config_file="$1" - - if [ ! -f "$config_file" ]; then - echo "Configuration file not found: $config_file" - return 1 - fi - - echo "Processing documents based on configuration: $config_file" - - # Example configuration format: - # doc_id,flow_id,collection,tags - # doc_123,research-flow,research-docs,nlp research - - while IFS=',' read -r doc_id flow_id collection tags; do - # Skip header line - if [ "$doc_id" = "doc_id" ]; then - continue - fi - - proc_id="config_$(date +%s)_${doc_id}" - - echo "Processing: $doc_id -> $collection (flow: $flow_id)" - - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - -i "$flow_id" \ - --collection "$collection" \ - --tags "$tags" - - done < "$config_file" -} - -# Create example configuration -cat > processing_config.csv << EOF -doc_id,flow_id,collection,tags -doc_123,research-flow,research-docs,nlp research -doc_456,finance-flow,finance-docs,financial quarterly -doc_789,general-flow,general-docs,general processing -EOF - -# Process based on configuration -config_driven_processing "processing_config.csv" -``` - -## Error Handling - -### Document Not Found -```bash -Exception: Document not found -``` -**Solution**: Verify document exists with `tg-show-library-documents`. - -### Processing ID Conflict -```bash -Exception: Processing ID already exists -``` -**Solution**: Use a unique processing ID or check existing jobs with `tg-show-flows`. - -### Flow Not Found -```bash -Exception: Flow instance not found -``` -**Solution**: Verify flow exists with `tg-show-flows` or `tg-show-flow-blueprints`. - -### Insufficient Resources -```bash -Exception: Processing queue full -``` -**Solution**: Wait for current jobs to complete or scale processing resources. - -## Monitoring and Management - -### Processing Status -```bash -# Monitor processing progress -monitor_processing() { - local proc_id="$1" - local timeout="${2:-300}" # 5 minutes default - - echo "Monitoring processing: $proc_id" - - elapsed=0 - interval=10 - - while [ $elapsed -lt $timeout ]; do - # Check if processing is active - if tg-show-flows | grep -q "$proc_id"; then - echo "Processing active... (${elapsed}s elapsed)" - else - echo "Processing completed or stopped" - break - fi - - sleep $interval - elapsed=$((elapsed + interval)) - done - - if [ $elapsed -ge $timeout ]; then - echo "Monitoring timeout reached" - fi -} - -# Monitor specific processing job -monitor_processing "proc_001" 600 -``` - -### Batch Monitoring -```bash -# Monitor multiple processing jobs -monitor_batch() { - local proc_pattern="$1" - - echo "Monitoring batch processing: $proc_pattern" - - while true; do - active_jobs=$(tg-show-flows | grep -c "$proc_pattern" || echo "0") - - if [ "$active_jobs" -eq 0 ]; then - echo "All batch processing jobs completed" - break - fi - - echo "Active jobs: $active_jobs" - sleep 30 - done -} - -# Monitor batch processing -monitor_batch "batch_proc_" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-show-library-documents`](tg-show-library-documents.md) - List available documents -- [`tg-stop-library-processing`](tg-stop-library-processing.md) - Stop processing jobs -- [`tg-show-flows`](tg-show-flows.md) - Monitor processing flows -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Query processed documents - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to initiate document processing workflows. - -## Best Practices - -1. **Unique IDs**: Always use unique processing IDs to avoid conflicts -2. **Resource Management**: Monitor system resources during batch processing -3. **Error Handling**: Implement retry logic for robust processing -4. **Monitoring**: Track processing progress and completion -5. **Collection Organization**: Use meaningful collection names -6. **Tagging**: Apply consistent tagging for better organization -7. **Documentation**: Document processing procedures and configurations - -## Troubleshooting - -### Processing Not Starting -```bash -# Check document exists -tg-show-library-documents | grep "document-id" - -# Check flow is available -tg-show-flows | grep "flow-id" - -# Check system resources -free -h -df -h -``` - -### Slow Processing -```bash -# Check processing queue -tg-show-flows | grep processing | wc -l - -# Monitor system load -top -htop -``` - -### Processing Failures -```bash -# Check processing logs -# (Log location depends on TrustGraph configuration) - -# Retry with different flow -tg-start-library-processing -d "doc-id" --id "retry-proc" -i "alternative-flow" -``` \ No newline at end of file diff --git a/docs/cli/tg-stop-flow.md b/docs/cli/tg-stop-flow.md deleted file mode 100644 index 97ad1696..00000000 --- a/docs/cli/tg-stop-flow.md +++ /dev/null @@ -1,256 +0,0 @@ -# tg-stop-flow - -Stops a running processing flow. - -## Synopsis - -```bash -tg-stop-flow -i FLOW_ID [options] -``` - -## Description - -The `tg-stop-flow` command terminates a running flow instance and releases its associated resources. When a flow is stopped, it becomes unavailable for processing requests, and all its service endpoints are shut down. - -This command is essential for flow lifecycle management, resource cleanup, and system maintenance operations. - -## Options - -### Required Arguments - -- `-i, --flow-id FLOW_ID`: Identifier of the flow to stop - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) - -## Examples - -### Stop Specific Flow -```bash -tg-stop-flow -i research-flow -``` - -### Using Custom API URL -```bash -tg-stop-flow -i production-flow -u http://production:8088/ -``` - -### Stop Multiple Flows -```bash -# Stop multiple flows in sequence -tg-stop-flow -i dev-flow-1 -tg-stop-flow -i dev-flow-2 -tg-stop-flow -i test-flow -``` - -## Prerequisites - -### Flow Must Exist and Be Running -Before stopping a flow, verify it exists: - -```bash -# Check running flows -tg-show-flows - -# Stop the desired flow -tg-stop-flow -i my-flow -``` - -## Flow Termination Process - -1. **Request Validation**: Verifies flow exists and is running -2. **Service Shutdown**: Stops all flow service endpoints -3. **Resource Cleanup**: Releases allocated system resources -4. **Queue Cleanup**: Cleans up associated Pulsar queues -5. **State Update**: Updates flow status to stopped - -## Impact of Stopping Flows - -### Service Unavailability -Once stopped, the flow's services become unavailable: -- REST API endpoints return errors -- WebSocket connections are terminated -- Pulsar queues are cleaned up - -### In-Progress Operations -- **Completed**: Already finished operations remain completed -- **Active**: In-progress operations may be interrupted -- **Queued**: Pending operations are lost - -### Resource Recovery -- **Memory**: Memory allocated to flow components is freed -- **CPU**: Processing resources are returned to system pool -- **Storage**: Temporary storage is cleaned up - -## Error Handling - -### Flow Not Found -```bash -Exception: Flow 'invalid-flow' not found -``` -**Solution**: Check available flows with `tg-show-flows` and verify the flow ID. - -### Flow Already Stopped -```bash -Exception: Flow 'my-flow' is not running -``` -**Solution**: The flow is already stopped. Use `tg-show-flows` to check current status. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Verify the API URL and ensure TrustGraph is running. - -### Permission Errors -```bash -Exception: Insufficient permissions to stop flow -``` -**Solution**: Check user permissions and authentication credentials. - -## Output - -On successful flow termination: -```bash -Flow 'research-flow' stopped successfully. -``` - -No output typically indicates successful operation. - -## Flow Management Workflow - -### Development Cycle -```bash -# 1. Start flow for development -tg-start-flow -n "dev-class" -i "dev-flow" -d "Development testing" - -# 2. Use flow for testing -tg-invoke-graph-rag -q "test query" -f dev-flow - -# 3. Stop flow when done -tg-stop-flow -i dev-flow -``` - -### Resource Management -```bash -# Check active flows -tg-show-flows - -# Stop unused flows to free resources -tg-stop-flow -i old-research-flow -tg-stop-flow -i temporary-test-flow -``` - -### System Maintenance -```bash -# Stop all flows before maintenance -for flow in $(tg-show-flows | grep "id" | awk '{print $2}'); do - tg-stop-flow -i "$flow" -done -``` - -## Safety Considerations - -### Data Preservation -- **Knowledge Cores**: Loaded knowledge cores are preserved -- **Library Documents**: Library documents remain intact -- **Configuration**: System configuration is unaffected - -### Service Dependencies -- **Dependent Services**: Ensure no critical services depend on the flow -- **Active Users**: Notify users before stopping production flows -- **Scheduled Operations**: Check for scheduled operations using the flow - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-start-flow`](tg-start-flow.md) - Start a new flow instance -- [`tg-show-flows`](tg-show-flows.md) - List active flows -- [`tg-show-flow-state`](tg-show-flow-state.md) - Check detailed flow status -- [`tg-show-flow-blueprints`](tg-show-flow-blueprints.md) - List available flow blueprintes - -## API Integration - -This command uses the [Flow API](../apis/api-flow.md) with the `stop-flow` operation to terminate flow instances. - -## Use Cases - -### Development Environment Cleanup -```bash -# Clean up development flows at end of day -tg-stop-flow -i dev-$(whoami) -tg-stop-flow -i test-experimental -``` - -### Resource Optimization -```bash -# Stop idle flows to free resources -tg-show-flows | grep "idle" | while read flow; do - tg-stop-flow -i "$flow" -done -``` - -### Environment Switching -```bash -# Switch from development to production configuration -tg-stop-flow -i dev-flow -tg-start-flow -n "production-class" -i "prod-flow" -d "Production processing" -``` - -### Maintenance Operations -```bash -# Prepare for system maintenance -echo "Stopping all flows for maintenance..." -tg-show-flows | grep -E "^[a-z-]+" | while read flow_id; do - echo "Stopping $flow_id" - tg-stop-flow -i "$flow_id" -done -``` - -### Flow Recycling -```bash -# Restart flow with fresh configuration -tg-stop-flow -i my-flow -tg-start-flow -n "updated-class" -i "my-flow" -d "Updated configuration" -``` - -## Best Practices - -1. **Graceful Shutdown**: Allow in-progress operations to complete when possible -2. **User Notification**: Inform users before stopping production flows -3. **Resource Monitoring**: Check system resources after stopping flows -4. **Documentation**: Record why flows were stopped for audit purposes -5. **Verification**: Confirm flow stopped successfully with `tg-show-flows` -6. **Cleanup Planning**: Plan flow stops during low-usage periods - -## Troubleshooting - -### Flow Won't Stop -```bash -# Check flow status -tg-show-flow-state -i problematic-flow - -# Force stop if necessary (implementation dependent) -# Contact system administrator if flow remains stuck -``` - -### Resource Not Released -```bash -# Check system resources after stopping -ps aux | grep trustgraph -netstat -an | grep 8088 - -# Restart TrustGraph if resources not properly released -``` - -### Service Still Responding -```bash -# Verify flow services are actually stopped -tg-invoke-graph-rag -q "test" -f stopped-flow - -# Should return flow not found error -``` \ No newline at end of file diff --git a/docs/cli/tg-stop-library-processing.md b/docs/cli/tg-stop-library-processing.md deleted file mode 100644 index 053ea011..00000000 --- a/docs/cli/tg-stop-library-processing.md +++ /dev/null @@ -1,507 +0,0 @@ -# tg-stop-library-processing - -Removes a library document processing record from TrustGraph. - -## Synopsis - -```bash -tg-stop-library-processing --id PROCESSING_ID [options] -``` - -## Description - -The `tg-stop-library-processing` command removes a document processing record from TrustGraph's library processing system. This command removes the processing record but **does not stop in-flight processing** that may already be running. - -This is primarily used for cleaning up processing records, managing processing queues, and maintaining processing history. - -## Options - -### Required Arguments - -- `--id, --processing-id ID`: Processing ID to remove - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User ID (default: `trustgraph`) - -## Examples - -### Remove Single Processing Record -```bash -tg-stop-library-processing --id "proc_123456789" -``` - -### Remove with Custom User -```bash -tg-stop-library-processing --id "research_proc_001" -U "research-team" -``` - -### Remove with Custom API URL -```bash -tg-stop-library-processing --id "proc_555" -u http://staging:8088/ -``` - -## Important Limitations - -### Processing Record vs Active Processing -This command only removes the **processing record** and does not: -- Stop currently running processing jobs -- Cancel in-flight document analysis -- Interrupt active workflows - -### What It Does -- Removes processing metadata from library -- Cleans up processing history -- Allows reuse of processing IDs -- Maintains processing queue hygiene - -### What It Doesn't Do -- Stop active processing threads -- Cancel running analysis jobs -- Interrupt flow execution -- Free up computational resources immediately - -## Use Cases - -### Cleanup Failed Processing Records -```bash -# Remove failed processing records -failed_processes=("proc_failed_001" "proc_error_002" "proc_timeout_003") -for proc_id in "${failed_processes[@]}"; do - echo "Removing failed processing record: $proc_id" - tg-stop-library-processing --id "$proc_id" -done -``` - -### Batch Cleanup -```bash -# Clean up all processing records for a specific pattern -cleanup_batch_processing() { - local pattern="$1" - - echo "Cleaning up processing records matching: $pattern" - - # This would require a way to list processing records - # For now, use known processing IDs - tg-show-flows | \ - grep "$pattern" | \ - awk '{print $1}' | \ - while read proc_id; do - echo "Removing processing record: $proc_id" - tg-stop-library-processing --id "$proc_id" - done -} - -# Clean up old batch processing records -cleanup_batch_processing "batch_proc_" -``` - -### User-Specific Cleanup -```bash -# Clean up processing records for specific user -cleanup_user_processing() { - local user="$1" - - echo "Cleaning up processing records for user: $user" - - # Note: This assumes you have a way to list processing records by user - # Implementation would depend on available APIs - - # Example with known processing IDs - user_processes=("${user}_proc_001" "${user}_proc_002" "${user}_proc_003") - - for proc_id in "${user_processes[@]}"; do - echo "Removing processing record: $proc_id" - tg-stop-library-processing --id "$proc_id" -U "$user" - done -} - -# Clean up for specific user -cleanup_user_processing "temp-user" -``` - -### Age-Based Cleanup -```bash -# Clean up old processing records -cleanup_old_processing() { - local days_old="$1" - - echo "Cleaning up processing records older than $days_old days" - - # This would require timestamp information from processing records - # Implementation depends on available metadata - - cutoff_date=$(date -d "$days_old days ago" +"%Y%m%d") - - # Example with date-pattern processing IDs - # proc_20231215_001, proc_20231214_002, etc. - - for proc_id in proc_*; do - if [[ "$proc_id" =~ proc_([0-9]{8})_ ]]; then - proc_date="${BASH_REMATCH[1]}" - - if [[ "$proc_date" < "$cutoff_date" ]]; then - echo "Removing old processing record: $proc_id" - tg-stop-library-processing --id "$proc_id" - fi - fi - done -} - -# Clean up processing records older than 30 days -cleanup_old_processing 30 -``` - -## Safe Processing Management - -### Before Removing Processing Records -```bash -# Check if processing is actually complete before cleanup -safe_processing_cleanup() { - local proc_id="$1" - local doc_id="$2" - - echo "Safe cleanup for processing: $proc_id" - - # Check if document is accessible (processing likely complete) - if tg-invoke-document-rag -q "test" 2>/dev/null | grep -q "$doc_id"; then - echo "Document $doc_id is accessible, safe to remove processing record" - tg-stop-library-processing --id "$proc_id" - echo "Processing record removed: $proc_id" - else - echo "Document $doc_id not yet accessible, processing may still be active" - echo "Skipping removal of processing record: $proc_id" - fi -} - -# Usage -safe_processing_cleanup "proc_001" "doc_123" -``` - -### Verification Before Cleanup -```bash -# Verify processing completion before removing records -verify_and_cleanup() { - local proc_id="$1" - local collection="$2" - - echo "Verifying processing completion for: $proc_id" - - # Check if processing is still active in flows - if tg-show-flows | grep -q "$proc_id"; then - echo "Processing $proc_id is still active, not removing record" - return 1 - fi - - # Additional verification could include: - # - Checking if document content is available - # - Verifying embeddings are generated - # - Confirming knowledge graph updates - - echo "Processing appears complete, removing record" - tg-stop-library-processing --id "$proc_id" - - echo "Processing record removed: $proc_id" -} - -# Usage -verify_and_cleanup "proc_001" "research-docs" -``` - -## Advanced Usage - -### Conditional Cleanup -```bash -# Clean up processing records based on success criteria -conditional_cleanup() { - local proc_id="$1" - local doc_id="$2" - local collection="$3" - - echo "Conditional cleanup for: $proc_id" - - # Test if document is queryable (indicates successful processing) - test_query="What is this document about?" - - if result=$(tg-invoke-document-rag -q "$test_query" -C "$collection" 2>/dev/null); then - if echo "$result" | grep -q "answer"; then - echo "✓ Document is queryable, processing successful" - tg-stop-library-processing --id "$proc_id" - echo "Processing record cleaned up: $proc_id" - else - echo "⚠ Document query returned no answer, processing may be incomplete" - echo "Keeping processing record: $proc_id" - fi - else - echo "✗ Document query failed, processing incomplete or failed" - echo "Keeping processing record: $proc_id" - fi -} - -# Usage -conditional_cleanup "proc_001" "doc_123" "research-docs" -``` - -### Bulk Cleanup with Verification -```bash -# Bulk cleanup with individual verification -bulk_verified_cleanup() { - local proc_pattern="$1" - local collection="$2" - - echo "Bulk cleanup with verification for pattern: $proc_pattern" - - # Get list of processing IDs (this would need appropriate API) - # For now, use example pattern - - for proc_id in proc_batch_*; do - if [[ "$proc_id" =~ $proc_pattern ]]; then - echo "Checking processing: $proc_id" - - # Extract document ID from processing ID (example pattern) - if [[ "$proc_id" =~ _([^_]+)$ ]]; then - doc_id="${BASH_REMATCH[1]}" - - # Verify document is accessible - if tg-invoke-document-rag -q "test" -C "$collection" 2>/dev/null | grep -q "$doc_id"; then - echo "✓ Verified: $proc_id" - tg-stop-library-processing --id "$proc_id" - else - echo "⚠ Unverified: $proc_id" - fi - else - echo "? Unknown pattern: $proc_id" - fi - fi - done -} - -# Usage -bulk_verified_cleanup "batch_" "processed-docs" -``` - -### Processing Record Maintenance -```bash -# Maintain processing record hygiene -maintain_processing_records() { - local max_records="$1" - - echo "Maintaining processing records (max: $max_records)" - - # This would require an API to list and count processing records - # For now, demonstrate the concept - - # Count current processing records (placeholder) - current_count=150 # Would get this from API - - if [ "$current_count" -gt "$max_records" ]; then - excess=$((current_count - max_records)) - echo "Found $current_count records, removing $excess oldest" - - # Remove oldest processing records - # This would require timestamp information - echo "Would remove $excess oldest processing records" - - # Example implementation: - # oldest_records=($(get_oldest_processing_records $excess)) - # for proc_id in "${oldest_records[@]}"; do - # tg-stop-library-processing --id "$proc_id" - # done - else - echo "Processing record count within limits: $current_count" - fi -} - -# Maintain maximum 100 processing records -maintain_processing_records 100 -``` - -## Error Handling - -### Processing ID Not Found -```bash -Exception: Processing ID not found -``` -**Solution**: Verify processing ID exists and check spelling. - -### Processing Still Active -```bash -Exception: Cannot remove active processing record -``` -**Solution**: Wait for processing to complete or verify if processing is actually active. - -### Permission Errors -```bash -Exception: Access denied -``` -**Solution**: Check user permissions and processing record ownership. - -### API Connection Issues -```bash -Exception: Connection refused -``` -**Solution**: Check API URL and ensure TrustGraph is running. - -## Monitoring and Verification - -### Processing Record Status -```bash -# Check processing record status before removal -check_processing_status() { - local proc_id="$1" - - echo "Checking status of processing: $proc_id" - - # Check if processing is in active flows - if tg-show-flows | grep -q "$proc_id"; then - echo "Status: ACTIVE - Processing is currently running" - return 1 - else - echo "Status: INACTIVE - Processing not found in active flows" - return 0 - fi -} - -# Usage -if check_processing_status "proc_001"; then - echo "Safe to remove processing record" - tg-stop-library-processing --id "proc_001" -else - echo "Processing still active, not removing record" -fi -``` - -### Cleanup Verification -```bash -# Verify successful removal -verify_removal() { - local proc_id="$1" - - echo "Verifying removal of processing record: $proc_id" - - # Check if processing record still exists - # This would require an API to query processing records - - if tg-show-flows | grep -q "$proc_id"; then - echo "✗ Processing record still exists" - return 1 - else - echo "✓ Processing record successfully removed" - return 0 - fi -} - -# Usage -tg-stop-library-processing --id "proc_001" -verify_removal "proc_001" -``` - -## Integration with Processing Workflow - -### Complete Processing Lifecycle -```bash -# Complete processing lifecycle management -processing_lifecycle() { - local doc_id="$1" - local proc_id="$2" - local collection="$3" - - echo "Managing complete processing lifecycle" - echo "Document: $doc_id" - echo "Processing: $proc_id" - echo "Collection: $collection" - - # 1. Start processing - echo "1. Starting processing..." - tg-start-library-processing \ - -d "$doc_id" \ - --id "$proc_id" \ - --collection "$collection" - - # 2. Monitor processing - echo "2. Monitoring processing..." - timeout=300 - elapsed=0 - - while [ $elapsed -lt $timeout ]; do - if tg-invoke-document-rag -q "test" -C "$collection" 2>/dev/null | grep -q "$doc_id"; then - echo "✓ Processing completed" - break - fi - - sleep 10 - elapsed=$((elapsed + 10)) - done - - # 3. Verify completion - echo "3. Verifying completion..." - if tg-invoke-document-rag -q "What is this document?" -C "$collection" 2>/dev/null; then - echo "✓ Document is queryable" - - # 4. Clean up processing record - echo "4. Cleaning up processing record..." - tg-stop-library-processing --id "$proc_id" - echo "✓ Processing record removed" - else - echo "✗ Processing verification failed" - echo "Keeping processing record for investigation" - fi -} - -# Usage -processing_lifecycle "doc_123" "proc_test_001" "test-collection" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-start-library-processing`](tg-start-library-processing.md) - Start document processing -- [`tg-show-library-documents`](tg-show-library-documents.md) - List library documents -- [`tg-show-flows`](tg-show-flows.md) - Monitor active processing flows -- [`tg-invoke-document-rag`](tg-invoke-document-rag.md) - Verify processed documents - -## API Integration - -This command uses the [Library API](../apis/api-librarian.md) to remove processing records from the document processing system. - -## Best Practices - -1. **Verify Completion**: Ensure processing is complete before removing records -2. **Check Dependencies**: Verify no other processes depend on the processing record -3. **Gradual Cleanup**: Remove processing records gradually to avoid system impact -4. **Monitor Impact**: Watch for any effects of record removal on system performance -5. **Documentation**: Log processing record removals for audit purposes -6. **Backup**: Consider backing up processing metadata before removal -7. **Testing**: Test cleanup procedures in non-production environments - -## Troubleshooting - -### Record Won't Remove -```bash -# Check if processing is actually complete -tg-show-flows | grep "processing-id" - -# Verify API connectivity -curl -s "$TRUSTGRAPH_URL/api/v1/library/processing" > /dev/null -``` - -### Unexpected Behavior After Removal -```bash -# Check if document is still accessible -tg-invoke-document-rag -q "test" -C "collection" - -# Verify document processing status -tg-show-library-documents | grep "document-id" -``` - -### Permission Issues -```bash -# Check user permissions -tg-show-library-documents -U "your-user" - -# Verify processing record ownership -``` \ No newline at end of file diff --git a/docs/cli/tg-unload-kg-core.md b/docs/cli/tg-unload-kg-core.md deleted file mode 100644 index 2c044906..00000000 --- a/docs/cli/tg-unload-kg-core.md +++ /dev/null @@ -1,335 +0,0 @@ -# tg-unload-kg-core - -Removes a knowledge core from an active flow without deleting the stored core. - -## Synopsis - -```bash -tg-unload-kg-core --id CORE_ID [options] -``` - -## Description - -The `tg-unload-kg-core` command removes a previously loaded knowledge core from an active processing flow, making that knowledge unavailable for queries and processing within that specific flow. The knowledge core remains stored in the system and can be loaded again later or into different flows. - -This is useful for managing flow memory usage, switching knowledge contexts, or temporarily removing knowledge without permanent deletion. - -## Options - -### Required Arguments - -- `--id, --identifier CORE_ID`: Identifier of the knowledge core to unload - -### Optional Arguments - -- `-u, --api-url URL`: TrustGraph API URL (default: `$TRUSTGRAPH_URL` or `http://localhost:8088/`) -- `-U, --user USER`: User identifier (default: `trustgraph`) -- `-f, --flow-id FLOW`: Flow ID to unload knowledge from (default: `default`) - -## Examples - -### Unload from Default Flow -```bash -tg-unload-kg-core --id "research-knowledge" -``` - -### Unload from Specific Flow -```bash -tg-unload-kg-core \ - --id "medical-knowledge" \ - --flow-id "medical-analysis" \ - -U medical-team -``` - -### Unload Multiple Cores -```bash -# Unload several knowledge cores from a flow -tg-unload-kg-core --id "core-1" --flow-id "analysis-flow" -tg-unload-kg-core --id "core-2" --flow-id "analysis-flow" -tg-unload-kg-core --id "core-3" --flow-id "analysis-flow" -``` - -### Using Custom API URL -```bash -tg-unload-kg-core \ - --id "production-knowledge" \ - --flow-id "prod-flow" \ - -u http://production:8088/ -``` - -## Prerequisites - -### Knowledge Core Must Be Loaded -The knowledge core must currently be loaded in the specified flow: - -```bash -# Check what's loaded by querying the flow -tg-show-graph -f target-flow | head -10 - -# If no output, core may not be loaded -``` - -### Flow Must Be Running -The target flow must be active: - -```bash -# Check running flows -tg-show-flows - -# Verify the target flow exists -tg-show-flows | grep "target-flow" -``` - -## Unloading Process - -1. **Validation**: Verifies knowledge core is loaded in the specified flow -2. **Query Termination**: Stops any ongoing queries using the knowledge -3. **Index Cleanup**: Removes knowledge indexes from flow context -4. **Memory Release**: Frees memory allocated to the knowledge core -5. **Service Update**: Updates flow services to reflect knowledge unavailability - -## Effects of Unloading - -### Knowledge Becomes Unavailable -After unloading, the knowledge is no longer accessible through the flow: - -```bash -# Before unloading - knowledge available -tg-invoke-graph-rag -q "What knowledge is loaded?" -f my-flow - -# Unload the knowledge -tg-unload-kg-core --id "my-knowledge" --flow-id "my-flow" - -# After unloading - reduced knowledge available -tg-invoke-graph-rag -q "What knowledge is loaded?" -f my-flow -``` - -### Memory Recovery -- RAM used by knowledge indexes is freed -- Flow performance may improve -- Other knowledge cores in the flow remain unaffected - -### Core Preservation -- Knowledge core remains stored in the system -- Can be reloaded later -- Available for loading into other flows - -## Output - -Successful unloading typically produces no output: - -```bash -# Unload core (no output expected) -tg-unload-kg-core --id "test-core" --flow-id "test-flow" - -# Verify unloading by checking available knowledge -tg-show-graph -f test-flow | wc -l -# Should show fewer triples if core was successfully unloaded -``` - -## Error Handling - -### Knowledge Core Not Loaded -```bash -Exception: Knowledge core 'my-core' not loaded in flow 'my-flow' -``` -**Solution**: Verify the core is actually loaded using `tg-show-graph` or load it first with `tg-load-kg-core`. - -### Flow Not Found -```bash -Exception: Flow 'invalid-flow' not found -``` -**Solution**: Check running flows with `tg-show-flows` and verify the flow ID. - -### Permission Errors -```bash -Exception: Access denied to unload knowledge core -``` -**Solution**: Verify user permissions for the knowledge core and flow. - -### Connection Errors -```bash -Exception: Connection refused -``` -**Solution**: Check the API URL and ensure TrustGraph is running. - -## Verification - -### Check Knowledge Reduction -```bash -# Count triples before unloading -before=$(tg-show-graph -f my-flow | wc -l) - -# Unload knowledge -tg-unload-kg-core --id "my-core" --flow-id "my-flow" - -# Count triples after unloading -after=$(tg-show-graph -f my-flow | wc -l) - -echo "Triples before: $before, after: $after" -``` - -### Test Query Impact -```bash -# Test queries before and after unloading -tg-invoke-graph-rag -q "test query" -f my-flow - -# Should work with loaded knowledge -tg-unload-kg-core --id "relevant-core" --flow-id "my-flow" - -tg-invoke-graph-rag -q "test query" -f my-flow -# May return different results or "no relevant knowledge found" -``` - -## Use Cases - -### Memory Management -```bash -# Free up memory by unloading unused knowledge -tg-unload-kg-core --id "large-historical-data" --flow-id "analysis-flow" - -# Load more relevant knowledge -tg-load-kg-core --id "current-data" --flow-id "analysis-flow" -``` - -### Context Switching -```bash -# Switch from medical to legal knowledge context -tg-unload-kg-core --id "medical-knowledge" --flow-id "analysis-flow" -tg-load-kg-core --id "legal-knowledge" --flow-id "analysis-flow" -``` - -### Selective Knowledge Loading -```bash -# Load only specific knowledge for focused analysis -tg-unload-kg-core --id "general-knowledge" --flow-id "specialized-flow" -tg-load-kg-core --id "domain-specific" --flow-id "specialized-flow" -``` - -### Testing and Development -```bash -# Test flow behavior with different knowledge sets -tg-unload-kg-core --id "production-data" --flow-id "test-flow" -tg-load-kg-core --id "test-data" --flow-id "test-flow" - -# Run tests -./run-knowledge-tests.sh - -# Restore production knowledge -tg-unload-kg-core --id "test-data" --flow-id "test-flow" -tg-load-kg-core --id "production-data" --flow-id "test-flow" -``` - -### Flow Maintenance -```bash -# Prepare flow for maintenance by unloading all knowledge -cores=$(tg-show-kg-cores) -for core in $cores; do - tg-unload-kg-core --id "$core" --flow-id "maintenance-flow" 2>/dev/null || true -done - -# Perform maintenance -./flow-maintenance.sh - -# Reload required knowledge -tg-load-kg-core --id "essential-core" --flow-id "maintenance-flow" -``` - -## Knowledge Management Workflow - -### Dynamic Knowledge Loading -```bash -# Function to switch knowledge contexts -switch_knowledge_context() { - local flow_id=$1 - local old_core=$2 - local new_core=$3 - - echo "Switching from $old_core to $new_core in $flow_id" - - # Unload old knowledge - tg-unload-kg-core --id "$old_core" --flow-id "$flow_id" - - # Load new knowledge - tg-load-kg-core --id "$new_core" --flow-id "$flow_id" - - echo "Context switch completed" -} - -# Usage -switch_knowledge_context "analysis-flow" "old-data" "new-data" -``` - -### Bulk Knowledge Management -```bash -# Unload all knowledge from a flow -unload_all_knowledge() { - local flow_id=$1 - - # Get list of potentially loaded cores - tg-show-kg-cores | while read core; do - echo "Attempting to unload $core from $flow_id" - tg-unload-kg-core --id "$core" --flow-id "$flow_id" 2>/dev/null || true - done - - echo "All knowledge unloaded from $flow_id" -} - -# Usage -unload_all_knowledge "cleanup-flow" -``` - -## Environment Variables - -- `TRUSTGRAPH_URL`: Default API URL - -## Related Commands - -- [`tg-load-kg-core`](tg-load-kg-core.md) - Load knowledge core into flow -- [`tg-show-kg-cores`](tg-show-kg-cores.md) - List available knowledge cores -- [`tg-show-graph`](tg-show-graph.md) - View currently loaded knowledge -- [`tg-show-flows`](tg-show-flows.md) - List active flows - -## API Integration - -This command uses the [Knowledge API](../apis/api-knowledge.md) with the `unload-kg-core` operation to remove knowledge from active flows. - -## Best Practices - -1. **Memory Monitoring**: Monitor flow memory usage when loading/unloading knowledge -2. **Graceful Unloading**: Ensure no critical queries are running before unloading -3. **Documentation**: Document which knowledge cores are needed for each flow -4. **Testing**: Test flow behavior after unloading knowledge -5. **Backup Strategy**: Keep knowledge cores stored even when not loaded -6. **Performance Optimization**: Unload unused knowledge to improve performance - -## Troubleshooting - -### Knowledge Still Appears in Queries -```bash -# If knowledge still appears after unloading -# Check if multiple cores contain similar data -tg-show-graph -f my-flow | grep "expected-removed-entity" - -# Verify all relevant cores were unloaded -``` - -### Memory Not Released -```bash -# If memory usage doesn't decrease after unloading -# Check system memory usage -free -h - -# Contact system administrator if memory leak suspected -``` - -### Query Performance Issues -```bash -# If queries become slow after unloading -# May need to reload essential knowledge -tg-load-kg-core --id "essential-core" --flow-id "slow-flow" - -# Or restart the flow -tg-stop-flow -i "slow-flow" -tg-start-flow -n "flow-class" -i "slow-flow" -d "Restarted flow" -``` \ No newline at end of file diff --git a/docs/websocket.html b/docs/websocket.html new file mode 100644 index 00000000..859e3c6e --- /dev/null +++ b/docs/websocket.html @@ -0,0 +1,3270 @@ + + + + + + TrustGraph WebSocket API 1.8 documentation + + + + + + +
TrustGraph WebSocket API 1.8

WebSocket API for TrustGraph - providing multiplexed, asynchronous access to all services.

+

Overview

+

The WebSocket API provides access to all TrustGraph services over a single persistent connection:

+
    +
  • Multiplexed: Multiple concurrent requests with ID-based correlation
  • +
  • Asynchronous: Non-blocking request/response pattern
  • +
  • Efficient: Lower overhead than HTTP REST
  • +
  • Streaming: Real-time progressive responses
  • +
+

Protocol Summary

+

All messages are JSON with:

+
    +
  • id: Client-generated unique identifier for request/response correlation
  • +
  • service: Service identifier (e.g., "config", "agent", "document-rag")
  • +
  • flow: Optional flow ID for flow-hosted services
  • +
  • request/response: Service-specific payload (identical to REST API schemas)
  • +
  • error: Error information on failure
  • +
+

Service Types

+

Global Services (no flow parameter):

+
    +
  • config, flow, librarian, knowledge, collection-management
  • +
+

Flow-Hosted Services (require flow parameter):

+
    +
  • agent, text-completion, prompt, document-rag, graph-rag
  • +
  • embeddings, graph-embeddings, document-embeddings
  • +
  • triples, objects, nlp-query, structured-query, structured-diag
  • +
  • text-load, document-load, mcp-tool
  • +
+

Schema Reuse

+

Request and response payloads use identical schemas to the REST API. +See OpenAPI specification for detailed schema documentation.

+

Servers

  • ws://localhost:8088/wsproduction

    Local development WebSocket server

    +
    Security:
    • HTTP API key
      • Name: token
      • In: query

      Bearer token authentication when GATEWAY_SECRET is configured. +Include as query parameter: ws://localhost:8088/api/v1/socket?token=

      +

Messages

  • #1Service Request MessageServiceRequest

    Request message for any TrustGraph service

    Message IDServiceRequest

    Generic request message that can invoke any TrustGraph service.

    +

    The request field payload varies by service and matches the REST API request body schema.

    +
    oneOf

    Service request envelope with id, service, optional flow, and service-specific request payload

    +
  • #2Service Response MessageServiceResponse

    Successful response from any TrustGraph service

    Message IDServiceResponse

    Generic response message from any TrustGraph service.

    +

    The response field payload varies by service and matches the REST API response body schema.

    +

    For streaming services, multiple messages with the same id may be sent.

    +
    object

    WebSocket response message envelope for successful responses.

    +

    Contains the request ID for correlation and the service-specific response payload.

    +
  • #3Service Error MessageServiceError

    Error response from any TrustGraph service

    Message IDServiceError

    Error message sent when a service request fails.

    +

    Contains the request ID and error details.

    +
    object

    WebSocket error message envelope.

    +

    Sent when a request fails. Contains the request ID and error details.

    +

Schemas

  • object

    WebSocket request message envelope.

    +

    Wraps service-specific request payloads with routing and correlation metadata.

    +
  • object

    WebSocket response message envelope for successful responses.

    +

    Contains the request ID for correlation and the service-specific response payload.

    +
  • object

    WebSocket error message envelope.

    +

    Sent when a request fails. Contains the request ID and error details.

    +
+ + + + + + \ No newline at end of file diff --git a/specs/README.md b/specs/README.md new file mode 100644 index 00000000..ae596c2d --- /dev/null +++ b/specs/README.md @@ -0,0 +1,216 @@ +# TrustGraph API Specifications + +This directory contains formal specifications for the TrustGraph API Gateway. + +## Directory Structure + +``` +specs/ +├── api/ # OpenAPI 3.1.0 specification for REST API +│ ├── openapi.yaml # Main entry point +│ ├── paths/ # Endpoint definitions +│ ├── components/ # Reusable schemas, responses, parameters +│ └── security/ # Security scheme definitions +│ +├── websocket/ # AsyncAPI 3.0.0 specification for WebSocket API +│ ├── asyncapi.yaml # Main entry point +│ ├── channels/ # Channel definitions +│ ├── components/ # Message and schema definitions +│ └── STREAMING.md # Streaming patterns documentation +│ +└── README.md # This file +``` + +## Specifications + +### REST API (OpenAPI 3.1.0) + +Location: `specs/api/openapi.yaml` + +The REST API specification documents: +- **5 Global Services**: config, flow, librarian, knowledge, collection-management +- **16 Flow-Hosted Services**: agent, RAG, embeddings, queries, loading, tools +- **Import/Export**: Bulk data operations +- **Metrics**: Prometheus monitoring + +**Features**: +- Modular structure with $ref to external files +- Comprehensive request/response schemas +- Authentication via Bearer tokens +- Field naming in kebab-case + +### WebSocket API (AsyncAPI 3.0.0) + +Location: `specs/websocket/asyncapi.yaml` + +The WebSocket API specification documents: +- Multiplexed async communication protocol +- Request/response message envelopes with ID correlation +- All services accessible via single WebSocket connection +- Streaming response patterns + +**Features**: +- References REST API schemas (single source of truth) +- Message-based routing (service + optional flow parameters) +- Comprehensive streaming documentation +- Full async/multiplexing behavior + +## Building Documentation + +### Prerequisites + +```bash +npm install -g @redocly/cli @asyncapi/cli +``` + +Or use npx (no installation required). + +### Generate REST API Documentation + +**Using Redocly (HTML)**: +```bash +cd specs/api +npx @redocly/cli build-docs openapi.yaml -o ../../docs/api.html +``` + +**Preview in browser**: +```bash +cd specs/api +npx @redocly/cli preview-docs openapi.yaml +``` +Opens interactive documentation at http://localhost:8080 + +**Validate**: +```bash +cd specs/api +npx @redocly/cli lint openapi.yaml +``` + +### Generate WebSocket API Documentation + +**Using AsyncAPI (HTML)**: +```bash +cd specs/websocket +npx -p @asyncapi/cli asyncapi generate fromTemplate asyncapi.yaml @asyncapi/html-template@3.0.0 --use-new-generator -o /tmp/asyncapi-build -p singleFile=true --force-write +mv /tmp/asyncapi-build/index.html ../../docs/websocket.html +rm -rf /tmp/asyncapi-build +``` + +Notes: +- The generator must run from the `specs/websocket` directory to properly resolve relative `$ref` paths to OpenAPI schemas +- `--use-new-generator` flag enables generator v2 compatibility +- `-p singleFile=true` parameter generates a single standalone HTML file + +**Validate**: +```bash +cd specs/websocket +npx @asyncapi/cli validate asyncapi.yaml +``` + +### Build All Documentation + +Use the provided build script: +```bash +./specs/build-docs.sh +``` + +This generates: +- `docs/api.html` - REST API documentation +- `docs/websocket.html` - WebSocket API documentation + +## Viewing Documentation + +After building: + +**REST API**: +```bash +xdg-open docs/api.html +# or +firefox docs/api.html +``` + +**WebSocket API**: +```bash +xdg-open docs/websocket.html +# or +firefox docs/websocket.html +``` + +## Schema Reuse + +The WebSocket API specification **references** the REST API schemas using relative paths: + +```yaml +# In specs/websocket/components/messages/requests/AgentRequest.yaml +request: + $ref: '../../../../api/components/schemas/agent/AgentRequest.yaml' +``` + +This ensures: +- **Single source of truth** for all schemas +- **Consistency** between REST and WebSocket APIs +- **Easy maintenance** - update schemas in one place + +## Validation Status + +Both specifications are validated and error-free: + +- ✅ **OpenAPI**: Validated with Redocly CLI +- ✅ **AsyncAPI**: Validated with AsyncAPI CLI + +## Maintenance + +### Adding a New Service + +1. **Create schemas** in `specs/api/components/schemas/{service-name}/` + - `{ServiceName}Request.yaml` + - `{ServiceName}Response.yaml` + +2. **Create path definition** in `specs/api/paths/` or `specs/api/paths/flow/` + +3. **Add path to main spec** in `specs/api/openapi.yaml` + +4. **Create WebSocket message** in `specs/websocket/components/messages/requests/` + - Reference the OpenAPI request schema + +5. **Add to ServiceRequest** message in `specs/websocket/components/messages/ServiceRequest.yaml` + +6. **Validate both specs**: + ```bash + cd specs/api && npx @redocly/cli lint openapi.yaml + cd specs/websocket && npx @asyncapi/cli validate asyncapi.yaml + ``` + +### Modifying an Existing Service + +1. **Update schema** in `specs/api/components/schemas/{service-name}/` + +2. **Changes automatically apply** to WebSocket spec via $ref + +3. **Validate both specs** to ensure consistency + +## Tools and Resources + +**OpenAPI Tools**: +- [Redocly CLI](https://redocly.com/docs/cli/) - Linting, docs generation +- [Swagger Editor](https://editor.swagger.io/) - Online editor +- [OpenAPI Generator](https://openapi-generator.tech/) - Client/server code generation + +**AsyncAPI Tools**: +- [AsyncAPI CLI](https://www.asyncapi.com/tools/cli) - Validation, docs generation +- [AsyncAPI Studio](https://studio.asyncapi.com/) - Online editor +- [AsyncAPI Generator](https://www.asyncapi.com/tools/generator) - Template-based generation + +**Online Validators**: +- OpenAPI: https://editor.swagger.io/ +- AsyncAPI: https://studio.asyncapi.com/ + +## API Version + +Current version: **1.8.0** + +Version is specified in both: +- `specs/api/openapi.yaml` → `info.version` +- `specs/websocket/asyncapi.yaml` → `info.version` + +Update both when releasing a new API version. diff --git a/specs/api/openapi.yaml b/specs/api/openapi.yaml index b3258d14..55c05741 100644 --- a/specs/api/openapi.yaml +++ b/specs/api/openapi.yaml @@ -2,7 +2,7 @@ openapi: 3.1.0 info: title: TrustGraph API Gateway - version: 1.8.0 + version: "1.8" description: | REST API for TrustGraph - an AI-powered knowledge graph and RAG system. diff --git a/specs/build-docs.sh b/specs/build-docs.sh new file mode 100755 index 00000000..3425b339 --- /dev/null +++ b/specs/build-docs.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# +# Build documentation from OpenAPI and AsyncAPI specifications +# + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "Building TrustGraph API Documentation..." +echo + +# Create output directory +mkdir -p ../docs + +# Build REST API documentation +echo "Building REST API documentation (OpenAPI)..." +cd api +npx --yes @redocly/cli build-docs openapi.yaml -o ../../docs/api.html +echo "✓ REST API docs generated: docs/api.html" +echo + +# Build WebSocket API documentation +echo "Building WebSocket API documentation (AsyncAPI)..." +cd ../websocket +npx --yes -p @asyncapi/cli asyncapi generate fromTemplate asyncapi.yaml @asyncapi/html-template@3.0.0 --use-new-generator -o /tmp/asyncapi-build -p singleFile=true --force-write +mv /tmp/asyncapi-build/index.html ../../docs/websocket.html +rm -rf /tmp/asyncapi-build +echo "✓ WebSocket API docs generated: docs/websocket.html" +echo + +cd "$SCRIPT_DIR" +echo "Documentation build complete!" +echo +echo "View documentation:" +echo " REST API: file://$(realpath ../docs/api.html)" +echo " WebSocket API: file://$(realpath ../docs/websocket.html)" diff --git a/specs/websocket/STREAMING.md b/specs/websocket/STREAMING.md new file mode 100644 index 00000000..2ee59249 --- /dev/null +++ b/specs/websocket/STREAMING.md @@ -0,0 +1,357 @@ +# WebSocket Streaming Message Patterns + +This document describes streaming behavior for TrustGraph WebSocket services. + +## Overview + +Many TrustGraph services support streaming responses, where a single request results in multiple response messages sent progressively over time. This enables: +- Real-time output as it's generated +- Lower latency for first results +- Progressive UI updates +- Better user experience for long-running operations + +## Streaming Protocol + +### Request ID Correlation + +All streaming responses for a request share the same `id`: + +```json +// Single request +{"id": "req-1", "service": "agent", "flow": "my-flow", "request": {...}} + +// Multiple responses with same id +{"id": "req-1", "response": {...}} // First chunk +{"id": "req-1", "response": {...}} // Second chunk +{"id": "req-1", "response": {...}} // Final chunk +``` + +### Completion Indicators + +Services use different fields to indicate the final message: + +| Service | Completion Field | Final Value | +|---------|-----------------|-------------| +| agent | `end-of-dialog` | `true` | +| document-rag | `end-of-stream` | `true` | +| graph-rag | `end-of-stream` | `true` | +| text-completion | `end-of-stream` | `true` | +| prompt | `end-of-stream` | `true` | + +## Streaming Services + +### Agent Service + +Agent service streams thought processes, actions, observations, and answers: + +```json +// Request +{ + "id": "agent-1", + "service": "agent", + "flow": "my-flow", + "request": { + "question": "What is quantum computing?", + "streaming": true + } +} + +// Response stream +{ + "id": "agent-1", + "response": { + "chunk-type": "thought", + "content": "I need to explain quantum computing concepts", + "end-of-dialog": false + } +} + +{ + "id": "agent-1", + "response": { + "chunk-type": "answer", + "content": "Quantum computing is a type of computing that uses quantum mechanical phenomena...", + "end-of-dialog": false + } +} + +{ + "id": "agent-1", + "response": { + "chunk-type": "answer", + "content": "Key principles include superposition and entanglement.", + "end-of-dialog": true + } +} +``` + +**Chunk Types**: +- `thought`: Internal reasoning +- `action`: Tool/action being invoked +- `observation`: Result from tool/action +- `answer`: Final answer content + +### Document RAG Service + +Document RAG streams answer chunks: + +```json +// Request +{ + "id": "rag-1", + "service": "document-rag", + "flow": "my-flow", + "request": { + "query": "What are the main features?", + "streaming": true, + "doc-limit": 20 + } +} + +// Response stream +{ + "id": "rag-1", + "response": { + "content": "The main features include: 1) ", + "end-of-stream": false + } +} + +{ + "id": "rag-1", + "response": { + "content": "Knowledge graph storage, 2) Vector embeddings, ", + "end-of-stream": false + } +} + +{ + "id": "rag-1", + "response": { + "content": "3) RAG capabilities.", + "end-of-stream": true + } +} +``` + +### Graph RAG Service + +Similar to Document RAG but retrieves from knowledge graph: + +```json +{ + "id": "graph-rag-1", + "service": "graph-rag", + "flow": "my-flow", + "request": { + "query": "What entities are related to quantum computing?", + "streaming": true, + "triple-limit": 100 + } +} +``` + +Response stream has same structure as Document RAG. + +### Text Completion Service + +Streams generated text: + +```json +{ + "id": "complete-1", + "service": "text-completion", + "flow": "my-flow", + "request": { + "prompt": "Once upon a time", + "streaming": true, + "max-output-tokens": 100 + } +} + +// Response stream +{ + "id": "complete-1", + "response": { + "content": " there was a ", + "end-of-stream": false + } +} + +{ + "id": "complete-1", + "response": { + "content": "kingdom far away...", + "end-of-stream": true + } +} +``` + +### Prompt Service + +Streams prompt expansion/generation: + +```json +{ + "id": "prompt-1", + "service": "prompt", + "flow": "my-flow", + "request": { + "template": "default-template", + "variables": {"topic": "quantum"}, + "streaming": true + } +} +``` + +Response stream contains progressive prompt text. + +## Non-Streaming Services + +These services return a single response message: + +- **config**: Configuration operations +- **flow**: Flow lifecycle management +- **librarian**: Library operations +- **knowledge**: Knowledge graph operations +- **collection-management**: Collection metadata +- **embeddings**: Generate embeddings +- **mcp-tool**: Tool invocation +- **triples**: Triple pattern queries +- **objects**: GraphQL queries +- **nlp-query**: NLP-based queries +- **structured-query**: Structured queries +- **structured-diag**: Diagnostics +- **graph-embeddings**: Embedding-based graph search +- **document-embeddings**: Embedding-based document search +- **text-load**: Text loading (returns status) +- **document-load**: Document loading (returns status) + +## Client Implementation Guide + +### Basic Streaming Handler + +```javascript +const pendingRequests = new Map(); + +// Send request +const requestId = generateUniqueId(); +const request = { + id: requestId, + service: 'agent', + flow: 'my-flow', + request: { + question: 'What is quantum computing?', + streaming: true + } +}; + +pendingRequests.set(requestId, { + chunks: [], + complete: false +}); + +websocket.send(JSON.stringify(request)); + +// Handle responses +websocket.onmessage = (event) => { + const message = JSON.parse(event.data); + + if (message.error) { + // Handle error + console.error(`Request ${message.id} failed:`, message.error); + pendingRequests.delete(message.id); + return; + } + + const pending = pendingRequests.get(message.id); + if (!pending) { + console.warn(`Unexpected response for ${message.id}`); + return; + } + + // Accumulate chunk + pending.chunks.push(message.response); + + // Check if complete + const isComplete = + message.response['end-of-stream'] === true || + message.response['end-of-dialog'] === true; + + if (isComplete) { + pending.complete = true; + console.log(`Request ${message.id} complete:`, pending.chunks); + pendingRequests.delete(message.id); + } else { + // Process intermediate chunk + console.log(`Chunk for ${message.id}:`, message.response); + } +}; +``` + +## Error Handling in Streaming + +Errors can occur at any point during streaming: + +```json +// Mid-stream error +{ + "id": "req-1", + "response": { + "chunk-type": "thought", + "content": "Processing...", + "end-of-dialog": false + } +} + +// Error interrupts stream +{ + "id": "req-1", + "error": { + "type": "service-error", + "message": "Backend timeout" + } +} +``` + +When an error occurs, no further response messages will be sent for that request ID. The client should: +1. Stop waiting for completion +2. Handle the partial results appropriately +3. Clean up request state + +## Performance Considerations + +### Multiplexing Streaming Requests + +Multiple streaming requests can be active simultaneously: + +```json +{"id": "req-1", "service": "agent", ...} +{"id": "req-2", "service": "document-rag", ...} +{"id": "req-3", "service": "text-completion", ...} + +// Responses may interleave +{"id": "req-2", "response": {...}} +{"id": "req-1", "response": {...}} +{"id": "req-3", "response": {...}} +{"id": "req-1", "response": {...}} +{"id": "req-2", "response": {...}} +``` + +### Backpressure + +If the client is slow to consume streaming responses, the WebSocket connection may experience: +- Buffering on the server side +- Increased latency +- Potential connection issues + +Clients should process streaming chunks efficiently or implement flow control. + +## Best Practices + +1. **Always check completion flags**: Don't assume a fixed number of chunks +2. **Handle partial results**: Be prepared for errors mid-stream +3. **Unique request IDs**: Ensure IDs are unique across active requests +4. **Timeout handling**: Implement client-side timeouts for streaming requests +5. **Memory management**: Don't accumulate unbounded chunks; process incrementally +6. **User feedback**: Show progressive results to users as chunks arrive diff --git a/specs/websocket/asyncapi.yaml b/specs/websocket/asyncapi.yaml new file mode 100644 index 00000000..43204aa7 --- /dev/null +++ b/specs/websocket/asyncapi.yaml @@ -0,0 +1,87 @@ +asyncapi: 3.0.0 + +info: + title: TrustGraph WebSocket API + version: "1.8" + description: | + WebSocket API for TrustGraph - providing multiplexed, asynchronous access to all services. + + ## Overview + + The WebSocket API provides access to all TrustGraph services over a single persistent connection: + - **Multiplexed**: Multiple concurrent requests with ID-based correlation + - **Asynchronous**: Non-blocking request/response pattern + - **Efficient**: Lower overhead than HTTP REST + - **Streaming**: Real-time progressive responses + + ## Protocol Summary + + All messages are JSON with: + - `id`: Client-generated unique identifier for request/response correlation + - `service`: Service identifier (e.g., "config", "agent", "document-rag") + - `flow`: Optional flow ID for flow-hosted services + - `request`/`response`: Service-specific payload (identical to REST API schemas) + - `error`: Error information on failure + + ## Service Types + + **Global Services** (no `flow` parameter): + - config, flow, librarian, knowledge, collection-management + + **Flow-Hosted Services** (require `flow` parameter): + - agent, text-completion, prompt, document-rag, graph-rag + - embeddings, graph-embeddings, document-embeddings + - triples, objects, nlp-query, structured-query, structured-diag + - text-load, document-load, mcp-tool + + ## Schema Reuse + + Request and response payloads use identical schemas to the REST API. + See OpenAPI specification for detailed schema documentation. + + contact: + name: TrustGraph Project + url: https://trustgraph.ai + license: + name: Apache 2.0 + url: https://www.apache.org/licenses/LICENSE-2.0.html + +servers: + production: + host: localhost:8088 + protocol: ws + description: Local development WebSocket server + security: + - $ref: '#/components/securitySchemes/bearerAuth' + +defaultContentType: application/json + +channels: + socket: + $ref: './channels/socket.yaml' + +components: + securitySchemes: + bearerAuth: + type: httpApiKey + name: token + in: query + description: | + Bearer token authentication when GATEWAY_SECRET is configured. + Include as query parameter: ws://localhost:8088/api/v1/socket?token= + + messages: + ServiceRequest: + $ref: './components/messages/ServiceRequest.yaml' + ServiceResponse: + $ref: './components/messages/ServiceResponse.yaml' + ServiceError: + $ref: './components/messages/ServiceError.yaml' + + schemas: + RequestEnvelope: + $ref: './components/schemas/RequestEnvelope.yaml' + ResponseEnvelope: + $ref: './components/schemas/ResponseEnvelope.yaml' + ErrorEnvelope: + $ref: './components/schemas/ErrorEnvelope.yaml' diff --git a/specs/websocket/channels/socket.yaml b/specs/websocket/channels/socket.yaml new file mode 100644 index 00000000..1af07eba --- /dev/null +++ b/specs/websocket/channels/socket.yaml @@ -0,0 +1,33 @@ +address: /api/v1/socket +description: | + Primary WebSocket channel for all TrustGraph services. + + This single channel provides multiplexed access to: + - All global services (config, flow, librarian, knowledge, collection-management) + - All flow-hosted services (agent, RAG, embeddings, queries, loading, etc.) + + ## Multiplexing + + Multiple requests can be sent concurrently over this channel. Each request includes + a unique `id` field that is echoed back in responses for correlation. + + ## Message Flow + + 1. Client sends request with unique `id`, `service`, optional `flow`, and `request` payload + 2. Server processes request asynchronously + 3. Server sends response(s) with matching `id` and either `response` or `error` + 4. For streaming services, multiple responses may be sent with the same `id` + + ## Service Routing + + Messages are routed to services based on: + - `service`: Service identifier (required) + - `flow`: Flow ID (required for flow-hosted services, omitted for global services) + +messages: + request: + $ref: '../components/messages/ServiceRequest.yaml' + response: + $ref: '../components/messages/ServiceResponse.yaml' + error: + $ref: '../components/messages/ServiceError.yaml' diff --git a/specs/websocket/components/messages/ServiceError.yaml b/specs/websocket/components/messages/ServiceError.yaml new file mode 100644 index 00000000..8dc3ac9e --- /dev/null +++ b/specs/websocket/components/messages/ServiceError.yaml @@ -0,0 +1,27 @@ +name: ServiceError +title: Service Error Message +summary: Error response from any TrustGraph service +description: | + Error message sent when a service request fails. + + Contains the request ID and error details. + +payload: + $ref: '../schemas/ErrorEnvelope.yaml' + +examples: + - name: Flow not found error + summary: Requested flow does not exist + payload: + id: req-2 + error: + type: gateway-error + message: Flow 'my-flow' not found + + - name: Service timeout error + summary: Service processing timeout + payload: + id: req-3 + error: + type: timeout + message: Request exceeded 600s timeout diff --git a/specs/websocket/components/messages/ServiceRequest.yaml b/specs/websocket/components/messages/ServiceRequest.yaml new file mode 100644 index 00000000..8df44caa --- /dev/null +++ b/specs/websocket/components/messages/ServiceRequest.yaml @@ -0,0 +1,55 @@ +name: ServiceRequest +title: Service Request Message +summary: Request message for any TrustGraph service +description: | + Generic request message that can invoke any TrustGraph service. + + The `request` field payload varies by service and matches the REST API request body schema. + +payload: + description: Service request envelope with id, service, optional flow, and service-specific request payload + oneOf: + # Global services (no flow parameter) + - $ref: './requests/ConfigRequest.yaml' + - $ref: './requests/FlowRequest.yaml' + - $ref: './requests/LibrarianRequest.yaml' + - $ref: './requests/KnowledgeRequest.yaml' + - $ref: './requests/CollectionManagementRequest.yaml' + + # Flow-hosted services (require flow parameter) + - $ref: './requests/AgentRequest.yaml' + - $ref: './requests/DocumentRagRequest.yaml' + - $ref: './requests/GraphRagRequest.yaml' + - $ref: './requests/TextCompletionRequest.yaml' + - $ref: './requests/PromptRequest.yaml' + - $ref: './requests/EmbeddingsRequest.yaml' + - $ref: './requests/McpToolRequest.yaml' + - $ref: './requests/TriplesRequest.yaml' + - $ref: './requests/ObjectsRequest.yaml' + - $ref: './requests/NlpQueryRequest.yaml' + - $ref: './requests/StructuredQueryRequest.yaml' + - $ref: './requests/StructuredDiagRequest.yaml' + - $ref: './requests/GraphEmbeddingsRequest.yaml' + - $ref: './requests/DocumentEmbeddingsRequest.yaml' + - $ref: './requests/TextLoadRequest.yaml' + - $ref: './requests/DocumentLoadRequest.yaml' + +examples: + - name: Config service request + summary: List all flow configurations + payload: + id: req-1 + service: config + request: + operation: list + type: flow + + - name: Agent service request + summary: Ask question to agent + payload: + id: req-2 + service: agent + flow: my-flow + request: + question: What is quantum computing? + streaming: true diff --git a/specs/websocket/components/messages/ServiceResponse.yaml b/specs/websocket/components/messages/ServiceResponse.yaml new file mode 100644 index 00000000..8e0ffd3d --- /dev/null +++ b/specs/websocket/components/messages/ServiceResponse.yaml @@ -0,0 +1,32 @@ +name: ServiceResponse +title: Service Response Message +summary: Successful response from any TrustGraph service +description: | + Generic response message from any TrustGraph service. + + The `response` field payload varies by service and matches the REST API response body schema. + + For streaming services, multiple messages with the same `id` may be sent. + +payload: + $ref: '../schemas/ResponseEnvelope.yaml' + +examples: + - name: Config service response + summary: List of flow configurations + payload: + id: req-1 + response: + type: flow + keys: + - my-flow + - production-flow + + - name: Agent streaming response + summary: Agent answer chunk + payload: + id: req-2 + response: + chunk-type: answer + content: Quantum computing uses quantum mechanical phenomena... + end-of-stream: false diff --git a/specs/websocket/components/messages/requests/AgentRequest.yaml b/specs/websocket/components/messages/requests/AgentRequest.yaml new file mode 100644 index 00000000..c8e69143 --- /dev/null +++ b/specs/websocket/components/messages/requests/AgentRequest.yaml @@ -0,0 +1,28 @@ +type: object +description: WebSocket request for agent service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: agent + description: Service identifier for agent service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/agent/AgentRequest.yaml' +examples: + - id: req-1 + service: agent + flow: my-flow + request: + question: What is quantum computing? + streaming: true + system-prompt: You are a helpful assistant diff --git a/specs/websocket/components/messages/requests/CollectionManagementRequest.yaml b/specs/websocket/components/messages/requests/CollectionManagementRequest.yaml new file mode 100644 index 00000000..6b4764c8 --- /dev/null +++ b/specs/websocket/components/messages/requests/CollectionManagementRequest.yaml @@ -0,0 +1,21 @@ +type: object +description: WebSocket request for collection-management service (global service) +required: + - id + - service + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: collection-management + description: Service identifier for collection-management service + request: + $ref: '../../../../api/components/schemas/collection/CollectionRequest.yaml' +examples: + - id: req-1 + service: collection-management + request: + operation: list diff --git a/specs/websocket/components/messages/requests/ConfigRequest.yaml b/specs/websocket/components/messages/requests/ConfigRequest.yaml new file mode 100644 index 00000000..e503fd91 --- /dev/null +++ b/specs/websocket/components/messages/requests/ConfigRequest.yaml @@ -0,0 +1,29 @@ +type: object +description: WebSocket request for config service (global service) +required: + - id + - service + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: config + description: Service identifier for config service + request: + $ref: '../../../../api/components/schemas/config/ConfigRequest.yaml' +examples: + - id: req-1 + service: config + request: + operation: list + type: flow + - id: req-2 + service: config + request: + operation: get + keys: + - type: flow + key: my-flow diff --git a/specs/websocket/components/messages/requests/DocumentEmbeddingsRequest.yaml b/specs/websocket/components/messages/requests/DocumentEmbeddingsRequest.yaml new file mode 100644 index 00000000..2b738a5c --- /dev/null +++ b/specs/websocket/components/messages/requests/DocumentEmbeddingsRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for document-embeddings service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: document-embeddings + description: Service identifier for document-embeddings service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/embeddings-query/DocumentEmbeddingsQueryRequest.yaml' +examples: + - id: req-1 + service: document-embeddings + flow: my-flow + request: + text: quantum computing + limit: 10 diff --git a/specs/websocket/components/messages/requests/DocumentLoadRequest.yaml b/specs/websocket/components/messages/requests/DocumentLoadRequest.yaml new file mode 100644 index 00000000..a4981fe6 --- /dev/null +++ b/specs/websocket/components/messages/requests/DocumentLoadRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for document-load service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: document-load + description: Service identifier for document-load service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/loading/DocumentLoadRequest.yaml' +examples: + - id: req-1 + service: document-load + flow: my-flow + request: + url: https://example.com/document.pdf + collection: default diff --git a/specs/websocket/components/messages/requests/DocumentRagRequest.yaml b/specs/websocket/components/messages/requests/DocumentRagRequest.yaml new file mode 100644 index 00000000..5a6af993 --- /dev/null +++ b/specs/websocket/components/messages/requests/DocumentRagRequest.yaml @@ -0,0 +1,28 @@ +type: object +description: WebSocket request for document-rag service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: document-rag + description: Service identifier for document-rag service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/rag/DocumentRagRequest.yaml' +examples: + - id: req-1 + service: document-rag + flow: my-flow + request: + query: What are the main features? + streaming: true + doc-limit: 20 diff --git a/specs/websocket/components/messages/requests/EmbeddingsRequest.yaml b/specs/websocket/components/messages/requests/EmbeddingsRequest.yaml new file mode 100644 index 00000000..d0b60ff3 --- /dev/null +++ b/specs/websocket/components/messages/requests/EmbeddingsRequest.yaml @@ -0,0 +1,26 @@ +type: object +description: WebSocket request for embeddings service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: embeddings + description: Service identifier for embeddings service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/embeddings/EmbeddingsRequest.yaml' +examples: + - id: req-1 + service: embeddings + flow: my-flow + request: + text: What is quantum computing? diff --git a/specs/websocket/components/messages/requests/FlowRequest.yaml b/specs/websocket/components/messages/requests/FlowRequest.yaml new file mode 100644 index 00000000..56e38e42 --- /dev/null +++ b/specs/websocket/components/messages/requests/FlowRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for flow service (global service) +required: + - id + - service + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: flow + description: Service identifier for flow service + request: + $ref: '../../../../api/components/schemas/flow/FlowRequest.yaml' +examples: + - id: req-1 + service: flow + request: + operation: list + - id: req-2 + service: flow + request: + operation: start + flow: my-flow + blueprint: default-blueprint diff --git a/specs/websocket/components/messages/requests/GraphEmbeddingsRequest.yaml b/specs/websocket/components/messages/requests/GraphEmbeddingsRequest.yaml new file mode 100644 index 00000000..6d54bfb6 --- /dev/null +++ b/specs/websocket/components/messages/requests/GraphEmbeddingsRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for graph-embeddings service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: graph-embeddings + description: Service identifier for graph-embeddings service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/embeddings-query/GraphEmbeddingsQueryRequest.yaml' +examples: + - id: req-1 + service: graph-embeddings + flow: my-flow + request: + text: quantum computing + limit: 10 diff --git a/specs/websocket/components/messages/requests/GraphRagRequest.yaml b/specs/websocket/components/messages/requests/GraphRagRequest.yaml new file mode 100644 index 00000000..7647d6e5 --- /dev/null +++ b/specs/websocket/components/messages/requests/GraphRagRequest.yaml @@ -0,0 +1,28 @@ +type: object +description: WebSocket request for graph-rag service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: graph-rag + description: Service identifier for graph-rag service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/rag/GraphRagRequest.yaml' +examples: + - id: req-1 + service: graph-rag + flow: my-flow + request: + query: What entities are related to quantum computing? + streaming: true + triple-limit: 100 diff --git a/specs/websocket/components/messages/requests/KnowledgeRequest.yaml b/specs/websocket/components/messages/requests/KnowledgeRequest.yaml new file mode 100644 index 00000000..f798905c --- /dev/null +++ b/specs/websocket/components/messages/requests/KnowledgeRequest.yaml @@ -0,0 +1,31 @@ +type: object +description: WebSocket request for knowledge service (global service) +required: + - id + - service + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: knowledge + description: Service identifier for knowledge service + request: + $ref: '../../../../api/components/schemas/knowledge/KnowledgeRequest.yaml' +examples: + - id: req-1 + service: knowledge + request: + operation: store + triples: + - s: + v: https://example.com/entity1 + e: true + p: + v: https://example.com/relates-to + e: true + o: + v: https://example.com/entity2 + e: true diff --git a/specs/websocket/components/messages/requests/LibrarianRequest.yaml b/specs/websocket/components/messages/requests/LibrarianRequest.yaml new file mode 100644 index 00000000..07134a0c --- /dev/null +++ b/specs/websocket/components/messages/requests/LibrarianRequest.yaml @@ -0,0 +1,22 @@ +type: object +description: WebSocket request for librarian service (global service) +required: + - id + - service + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: librarian + description: Service identifier for librarian service + request: + $ref: '../../../../api/components/schemas/librarian/LibrarianRequest.yaml' +examples: + - id: req-1 + service: librarian + request: + operation: list + collection: default diff --git a/specs/websocket/components/messages/requests/McpToolRequest.yaml b/specs/websocket/components/messages/requests/McpToolRequest.yaml new file mode 100644 index 00000000..195dea36 --- /dev/null +++ b/specs/websocket/components/messages/requests/McpToolRequest.yaml @@ -0,0 +1,30 @@ +type: object +description: WebSocket request for mcp-tool service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: mcp-tool + description: Service identifier for mcp-tool service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/mcp-tool/McpToolRequest.yaml' +examples: + - id: req-1 + service: mcp-tool + flow: my-flow + request: + tool: calculator + arguments: + operation: add + a: 5 + b: 3 diff --git a/specs/websocket/components/messages/requests/NlpQueryRequest.yaml b/specs/websocket/components/messages/requests/NlpQueryRequest.yaml new file mode 100644 index 00000000..d0a9ec42 --- /dev/null +++ b/specs/websocket/components/messages/requests/NlpQueryRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for nlp-query service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: nlp-query + description: Service identifier for nlp-query service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/query/NlpQueryRequest.yaml' +examples: + - id: req-1 + service: nlp-query + flow: my-flow + request: + query: Show me all entities related to quantum computing + limit: 50 diff --git a/specs/websocket/components/messages/requests/ObjectsRequest.yaml b/specs/websocket/components/messages/requests/ObjectsRequest.yaml new file mode 100644 index 00000000..61c9ef64 --- /dev/null +++ b/specs/websocket/components/messages/requests/ObjectsRequest.yaml @@ -0,0 +1,26 @@ +type: object +description: WebSocket request for objects service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: objects + description: Service identifier for objects service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/query/ObjectsQueryRequest.yaml' +examples: + - id: req-1 + service: objects + flow: my-flow + request: + query: "{ entity(id: \"https://example.com/entity1\") { properties { key value } } }" diff --git a/specs/websocket/components/messages/requests/PromptRequest.yaml b/specs/websocket/components/messages/requests/PromptRequest.yaml new file mode 100644 index 00000000..24d39bbb --- /dev/null +++ b/specs/websocket/components/messages/requests/PromptRequest.yaml @@ -0,0 +1,29 @@ +type: object +description: WebSocket request for prompt service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: prompt + description: Service identifier for prompt service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/prompt/PromptRequest.yaml' +examples: + - id: req-1 + service: prompt + flow: my-flow + request: + template: default-template + variables: + topic: quantum computing + style: technical diff --git a/specs/websocket/components/messages/requests/StructuredDiagRequest.yaml b/specs/websocket/components/messages/requests/StructuredDiagRequest.yaml new file mode 100644 index 00000000..c47d402a --- /dev/null +++ b/specs/websocket/components/messages/requests/StructuredDiagRequest.yaml @@ -0,0 +1,26 @@ +type: object +description: WebSocket request for structured-diag service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: structured-diag + description: Service identifier for structured-diag service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/diag/StructuredDiagRequest.yaml' +examples: + - id: req-1 + service: structured-diag + flow: my-flow + request: + operation: status diff --git a/specs/websocket/components/messages/requests/StructuredQueryRequest.yaml b/specs/websocket/components/messages/requests/StructuredQueryRequest.yaml new file mode 100644 index 00000000..d23490d7 --- /dev/null +++ b/specs/websocket/components/messages/requests/StructuredQueryRequest.yaml @@ -0,0 +1,30 @@ +type: object +description: WebSocket request for structured-query service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: structured-query + description: Service identifier for structured-query service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/query/StructuredQueryRequest.yaml' +examples: + - id: req-1 + service: structured-query + flow: my-flow + request: + query: + type: entity + filters: + - property: type + value: Person diff --git a/specs/websocket/components/messages/requests/TextCompletionRequest.yaml b/specs/websocket/components/messages/requests/TextCompletionRequest.yaml new file mode 100644 index 00000000..5e2447f5 --- /dev/null +++ b/specs/websocket/components/messages/requests/TextCompletionRequest.yaml @@ -0,0 +1,28 @@ +type: object +description: WebSocket request for text-completion service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: text-completion + description: Service identifier for text-completion service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/text-completion/TextCompletionRequest.yaml' +examples: + - id: req-1 + service: text-completion + flow: my-flow + request: + prompt: Once upon a time + streaming: true + max-output-tokens: 100 diff --git a/specs/websocket/components/messages/requests/TextLoadRequest.yaml b/specs/websocket/components/messages/requests/TextLoadRequest.yaml new file mode 100644 index 00000000..28b9b8df --- /dev/null +++ b/specs/websocket/components/messages/requests/TextLoadRequest.yaml @@ -0,0 +1,27 @@ +type: object +description: WebSocket request for text-load service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: text-load + description: Service identifier for text-load service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/loading/TextLoadRequest.yaml' +examples: + - id: req-1 + service: text-load + flow: my-flow + request: + text: This is the document content to be loaded into the knowledge graph. + collection: default diff --git a/specs/websocket/components/messages/requests/TriplesRequest.yaml b/specs/websocket/components/messages/requests/TriplesRequest.yaml new file mode 100644 index 00000000..3ebfae43 --- /dev/null +++ b/specs/websocket/components/messages/requests/TriplesRequest.yaml @@ -0,0 +1,29 @@ +type: object +description: WebSocket request for triples service (flow-hosted service) +required: + - id + - service + - flow + - request +properties: + id: + type: string + description: Unique request identifier + service: + type: string + const: triples + description: Service identifier for triples service + flow: + type: string + description: Flow ID + request: + $ref: '../../../../api/components/schemas/query/TriplesQueryRequest.yaml' +examples: + - id: req-1 + service: triples + flow: my-flow + request: + s: + v: https://example.com/entity1 + e: true + limit: 100 diff --git a/specs/websocket/components/schemas/ErrorEnvelope.yaml b/specs/websocket/components/schemas/ErrorEnvelope.yaml new file mode 100644 index 00000000..6cd9a783 --- /dev/null +++ b/specs/websocket/components/schemas/ErrorEnvelope.yaml @@ -0,0 +1,37 @@ +type: object +description: | + WebSocket error message envelope. + + Sent when a request fails. Contains the request ID and error details. +required: + - id + - error +properties: + id: + type: string + description: | + Request identifier from the original request that failed. + examples: + - req-123 + - request-abc-456 + error: + type: object + description: Error information + required: + - type + - message + properties: + type: + type: string + description: Error type/category + examples: + - gateway-error + - service-error + - timeout + message: + type: string + description: Human-readable error message + examples: + - Flow not found + - Service timeout + - Invalid request format diff --git a/specs/websocket/components/schemas/RequestEnvelope.yaml b/specs/websocket/components/schemas/RequestEnvelope.yaml new file mode 100644 index 00000000..f3d78e49 --- /dev/null +++ b/specs/websocket/components/schemas/RequestEnvelope.yaml @@ -0,0 +1,56 @@ +type: object +description: | + WebSocket request message envelope. + + Wraps service-specific request payloads with routing and correlation metadata. +required: + - id + - service + - request +properties: + id: + type: string + description: | + Client-generated unique identifier for this request within the WebSocket session. + Used to correlate responses with requests in multiplexed async communication. + Can be any string, but must be unique per active request. + examples: + - req-123 + - request-abc-456 + - b5f8d9a2-4c3e-11ef-9c8a-0242ac120002 + service: + type: string + description: | + Service identifier. Same as {kind} in REST API URLs. + + Global services: config, flow, librarian, knowledge, collection-management + Flow-hosted services: agent, text-completion, prompt, document-rag, graph-rag, + embeddings, graph-embeddings, document-embeddings, triples, objects, + nlp-query, structured-query, structured-diag, text-load, document-load, mcp-tool + examples: + - config + - agent + - document-rag + flow: + type: string + description: | + Flow ID for flow-hosted services. Required for services accessed via + /api/v1/flow/{flow}/service/{kind} in REST API. + + Omit this field for global services (config, flow, librarian, knowledge, collection-management). + examples: + - my-flow + - production-flow + request: + type: object + description: | + Service-specific request payload. Structure is identical to the request body + in the corresponding REST API endpoint. + + See OpenAPI specification for detailed schemas per service. + examples: + - operation: list + type: flow + - question: What is quantum computing? + streaming: true + system-prompt: You are a helpful assistant diff --git a/specs/websocket/components/schemas/ResponseEnvelope.yaml b/specs/websocket/components/schemas/ResponseEnvelope.yaml new file mode 100644 index 00000000..1a6cdfd8 --- /dev/null +++ b/specs/websocket/components/schemas/ResponseEnvelope.yaml @@ -0,0 +1,35 @@ +type: object +description: | + WebSocket response message envelope for successful responses. + + Contains the request ID for correlation and the service-specific response payload. +required: + - id + - response +properties: + id: + type: string + description: | + Request identifier from the original request. Client uses this to match + responses to requests in multiplexed communication. + examples: + - req-123 + - request-abc-456 + response: + type: object + description: | + Service-specific response payload. Structure is identical to the response body + in the corresponding REST API endpoint. + + For streaming services, multiple response messages may be sent with the same `id`. + Look for `end-of-stream` or service-specific completion flags to detect the final message. + + See OpenAPI specification for detailed schemas per service. + examples: + - type: flow + keys: + - my-flow + - production-flow + - chunk-type: answer + content: Quantum computing uses quantum bits... + end-of-stream: false From 1c006d5b1417365422e017b06e41a7b9ccd468c7 Mon Sep 17 00:00:00 2001 From: cybermaggedon Date: Thu, 15 Jan 2026 15:12:32 +0000 Subject: [PATCH 9/9] Python API docs (#614) * Python API docs working * Python API doc generation --- docs/README.api-docs.md | 48 + docs/generate-api-docs.py | 351 +++ docs/python-api.md | 2143 +++++++++++++++++ trustgraph-base/trustgraph/api/__init__.py | 52 + trustgraph-base/trustgraph/api/api.py | 388 ++- trustgraph-base/trustgraph/api/async_flow.py | 563 ++++- trustgraph-base/trustgraph/api/bulk_client.py | 257 +- trustgraph-base/trustgraph/api/collection.py | 128 + trustgraph-base/trustgraph/api/config.py | 179 ++ trustgraph-base/trustgraph/api/flow.py | 580 ++++- trustgraph-base/trustgraph/api/knowledge.py | 122 + trustgraph-base/trustgraph/api/library.py | 252 +- .../trustgraph/api/socket_client.py | 381 ++- trustgraph-base/trustgraph/api/types.py | 128 +- 14 files changed, 5508 insertions(+), 64 deletions(-) create mode 100644 docs/README.api-docs.md create mode 100644 docs/generate-api-docs.py create mode 100644 docs/python-api.md diff --git a/docs/README.api-docs.md b/docs/README.api-docs.md new file mode 100644 index 00000000..8f7fecb8 --- /dev/null +++ b/docs/README.api-docs.md @@ -0,0 +1,48 @@ + +# Auto-generating docs + +## REST and WebSocket API Documentation + +- `specs/build-docs.sh` - Builds the REST and websocket documentation from the + OpenAPI and AsyncAPI specs. + +## Python API Documentation + +The Python API documentation is generated from docstrings using a custom Python script that introspects the `trustgraph.api` package. + +### Prerequisites + +The trustgraph package must be importable. If you're working in a development environment: + +```bash +cd trustgraph-base +pip install -e . +``` + +### Generating Documentation + +From the docs directory: + +```bash +cd docs +python3 generate-api-docs.py > python-api.md +``` + +This generates a single markdown file with complete API documentation showing: +- Installation and quick start guide +- Import statements for each class/type +- Full docstrings with examples +- Table of contents organized by category + +### Documentation Style + +All docstrings follow Google-style format: +- Brief one-line summary +- Detailed description +- Args section with parameter descriptions +- Returns section +- Raises section (when applicable) +- Example code blocks with proper syntax highlighting + +The generated documentation shows the public API exactly as users import it from `trustgraph.api`, without exposing internal module structure. + diff --git a/docs/generate-api-docs.py b/docs/generate-api-docs.py new file mode 100644 index 00000000..c8d54fda --- /dev/null +++ b/docs/generate-api-docs.py @@ -0,0 +1,351 @@ +#!/usr/bin/env python3 +""" +Generate clean markdown documentation for trustgraph.api + +This script introspects the trustgraph.api package and generates markdown +documentation showing the API as users actually import it. +""" + +import sys +import inspect +import importlib +from dataclasses import is_dataclass, fields +from typing import get_type_hints + +# Add parent directory to path +sys.path.insert(0, '../trustgraph-base') + +def parse_docstring(docstring): + """Parse Google-style docstring into sections""" + if not docstring: + return {"description": "", "args": [], "returns": "", "raises": [], "examples": []} + + lines = docstring.split('\n') + result = { + "description": [], + "args": [], + "returns": "", + "raises": [], + "examples": [], + "attributes": [] + } + + current_section = "description" + current_item = None + + for line in lines: + stripped = line.strip() + + # Check for section headers + if stripped in ["Args:", "Arguments:"]: + current_section = "args" + current_item = None + continue + elif stripped in ["Returns:", "Return:"]: + current_section = "returns" + current_item = None + continue + elif stripped in ["Raises:"]: + current_section = "raises" + current_item = None + continue + elif stripped in ["Example:", "Examples:"]: + current_section = "examples" + current_item = None + continue + elif stripped in ["Attributes:"]: + current_section = "attributes" + current_item = None + continue + elif stripped.startswith("Note:"): + current_section = "description" + result["description"].append(line) + continue + + # Process content based on section + if current_section == "description": + result["description"].append(line) + elif current_section == "args": + # Check if this is a new argument (starts with word followed by colon) + if stripped and not line.startswith(' ' * 8) and ':' in stripped: + parts = stripped.split(':', 1) + arg_name = parts[0].strip() + arg_desc = parts[1].strip() if len(parts) > 1 else "" + current_item = {"name": arg_name, "description": arg_desc} + result["args"].append(current_item) + elif current_item and stripped: + # Continuation of previous arg description + current_item["description"] += " " + stripped + elif current_section == "returns": + if stripped: + result["returns"] += stripped + " " + elif current_section == "raises": + if stripped and ':' in stripped: + parts = stripped.split(':', 1) + exc_name = parts[0].strip() + exc_desc = parts[1].strip() if len(parts) > 1 else "" + current_item = {"name": exc_name, "description": exc_desc} + result["raises"].append(current_item) + elif current_item and stripped: + current_item["description"] += " " + stripped + elif current_section == "examples": + result["examples"].append(line) + elif current_section == "attributes": + if stripped and '-' in stripped: + parts = stripped.split('-', 1) + if len(parts) == 2: + attr_name = parts[0].strip().strip('`') + attr_desc = parts[1].strip() + result["attributes"].append({"name": attr_name, "description": attr_desc}) + + # Clean up description + result["description"] = '\n'.join(result["description"]).strip() + result["returns"] = result["returns"].strip() + + return result + +def format_signature(name, obj): + """Format function/method signature""" + try: + sig = inspect.signature(obj) + return f"{name}{sig}" + except: + return f"{name}(...)" + +def document_function(name, func, indent=0): + """Generate markdown for a function""" + ind = " " * indent + md = [] + + # Function heading and signature + md.append(f"{ind}### `{format_signature(name, func)}`\n") + + # Parse docstring + doc = inspect.getdoc(func) + if doc: + parsed = parse_docstring(doc) + + # Description + if parsed["description"]: + md.append(f"{ind}{parsed['description']}\n") + + # Arguments + if parsed["args"]: + md.append(f"{ind}**Arguments:**\n") + for arg in parsed["args"]: + md.append(f"{ind}- `{arg['name']}`: {arg['description']}") + md.append("") + + # Returns + if parsed["returns"]: + md.append(f"{ind}**Returns:** {parsed['returns']}\n") + + # Raises + if parsed["raises"]: + md.append(f"{ind}**Raises:**\n") + for exc in parsed["raises"]: + md.append(f"{ind}- `{exc['name']}`: {exc['description']}") + md.append("") + + # Examples + if parsed["examples"]: + md.append(f"{ind}**Example:**\n") + + # Strip common leading whitespace from examples + import textwrap + example_text = '\n'.join(parsed["examples"]) + dedented = textwrap.dedent(example_text) + example_lines = dedented.split('\n') + + # Check if examples already contain code fences + if '```' in dedented: + # Already has code fences, don't wrap + for line in example_lines: + md.append(line.rstrip()) + md.append("") + else: + # No code fences, wrap in python block + md.append("```python") + for line in example_lines: + md.append(line.rstrip()) + md.append("```\n") + + return '\n'.join(md) + +def document_class(name, cls): + """Generate markdown for a class""" + md = [] + + # Class heading + md.append(f"## `{name}`\n") + + # Import statement + md.append(f"```python") + md.append(f"from trustgraph.api import {name}") + md.append(f"```\n") + + # Parse class docstring + doc = inspect.getdoc(cls) + if doc: + parsed = parse_docstring(doc) + + # Description + if parsed["description"]: + md.append(f"{parsed['description']}\n") + + # Attributes (for class-level attributes) + if parsed["attributes"]: + md.append(f"**Attributes:**\n") + for attr in parsed["attributes"]: + md.append(f"- `{attr['name']}`: {attr['description']}") + md.append("") + + # For dataclasses, show fields + if is_dataclass(cls): + md.append("**Fields:**\n") + for field in fields(cls): + field_doc = "" + if cls.__doc__: + # Try to extract field description from docstring + pass + md.append(f"- `{field.name}`: {field.type}") + md.append("") + + # Document methods + methods = [] + for method_name, method in inspect.getmembers(cls, predicate=inspect.isfunction): + # Skip private methods except special ones + if method_name.startswith('_') and method_name not in ['__init__', '__enter__', '__exit__', '__aenter__', '__aexit__']: + continue + methods.append((method_name, method)) + + if methods: + md.append("### Methods\n") + for method_name, method in methods: + md.append(document_function(method_name, method, indent=0)) + + return '\n'.join(md) + +def document_exception(name, exc): + """Generate markdown for an exception""" + md = [] + + md.append(f"## `{name}`\n") + + # Import statement + md.append(f"```python") + md.append(f"from trustgraph.api import {name}") + md.append(f"```\n") + + doc = inspect.getdoc(exc) + if doc: + md.append(f"{doc}\n") + else: + md.append(f"Exception class for {name.replace('Exception', '').replace('Error', '')} errors.\n") + + return '\n'.join(md) + +def generate_toc(items): + """Generate table of contents""" + md = [] + md.append("# TrustGraph Python API Reference\n") + + # Add introduction + md.append("## Installation\n") + md.append("```bash") + md.append("pip install trustgraph") + md.append("```\n") + + md.append("## Quick Start\n") + md.append("All classes and types are imported from the `trustgraph.api` package:\n") + md.append("```python") + md.append("from trustgraph.api import Api, Triple, ConfigKey") + md.append("") + md.append("# Create API client") + md.append("api = Api(url=\"http://localhost:8088/\")") + md.append("") + md.append("# Get a flow instance") + md.append("flow = api.flow().id(\"default\")") + md.append("") + md.append("# Execute a graph RAG query") + md.append("response = flow.graph_rag(") + md.append(" query=\"What are the main topics?\",") + md.append(" user=\"trustgraph\",") + md.append(" collection=\"default\"") + md.append(")") + md.append("```\n") + + md.append("## Table of Contents\n") + + # Group by category + categories = { + "Core": ["Api"], + "Flow Clients": ["Flow", "FlowInstance", "AsyncFlow", "AsyncFlowInstance"], + "WebSocket Clients": ["SocketClient", "SocketFlowInstance", "AsyncSocketClient", "AsyncSocketFlowInstance"], + "Bulk Operations": ["BulkClient", "AsyncBulkClient"], + "Metrics": ["Metrics", "AsyncMetrics"], + "Data Types": ["Triple", "ConfigKey", "ConfigValue", "DocumentMetadata", "ProcessingMetadata", + "CollectionMetadata", "StreamingChunk", "AgentThought", "AgentObservation", + "AgentAnswer", "RAGChunk"], + "Exceptions": [] + } + + # Find exceptions + for item in items: + if "Exception" in item or "Error" in item: + categories["Exceptions"].append(item) + + for category, names in categories.items(): + if not names: + continue + md.append(f"### {category}\n") + for name in names: + if name in items: + md.append(f"- [{name}](#{name.lower()})") + md.append("") + + return '\n'.join(md) + +def main(): + """Generate API documentation""" + + # Import the package + try: + api_module = importlib.import_module('trustgraph.api') + except ImportError as e: + print(f"Error importing trustgraph.api: {e}", file=sys.stderr) + sys.exit(1) + + # Get exported names + if not hasattr(api_module, '__all__'): + print("Error: trustgraph.api has no __all__", file=sys.stderr) + sys.exit(1) + + all_names = api_module.__all__ + + # Generate TOC + print(generate_toc(all_names)) + print("---\n") + + # Document each exported item + for name in all_names: + try: + obj = getattr(api_module, name) + + # Determine what kind of object it is + if inspect.isclass(obj): + if issubclass(obj, Exception): + print(document_exception(name, obj)) + else: + print(document_class(name, obj)) + elif inspect.isfunction(obj): + print(document_function(name, obj)) + + print("\n---\n") + + except Exception as e: + print(f"Error documenting {name}: {e}", file=sys.stderr) + continue + +if __name__ == "__main__": + main() diff --git a/docs/python-api.md b/docs/python-api.md new file mode 100644 index 00000000..47e5843e --- /dev/null +++ b/docs/python-api.md @@ -0,0 +1,2143 @@ +# TrustGraph Python API Reference + +## Installation + +```bash +pip install trustgraph +``` + +## Quick Start + +All classes and types are imported from the `trustgraph.api` package: + +```python +from trustgraph.api import Api, Triple, ConfigKey + +# Create API client +api = Api(url="http://localhost:8088/") + +# Get a flow instance +flow = api.flow().id("default") + +# Execute a graph RAG query +response = flow.graph_rag( + query="What are the main topics?", + user="trustgraph", + collection="default" +) +``` + +## Table of Contents + +### Core + +- [Api](#api) + +### Flow Clients + +- [Flow](#flow) +- [FlowInstance](#flowinstance) +- [AsyncFlow](#asyncflow) +- [AsyncFlowInstance](#asyncflowinstance) + +### WebSocket Clients + +- [SocketClient](#socketclient) +- [SocketFlowInstance](#socketflowinstance) +- [AsyncSocketClient](#asyncsocketclient) +- [AsyncSocketFlowInstance](#asyncsocketflowinstance) + +### Bulk Operations + +- [BulkClient](#bulkclient) +- [AsyncBulkClient](#asyncbulkclient) + +### Metrics + +- [Metrics](#metrics) +- [AsyncMetrics](#asyncmetrics) + +### Data Types + +- [Triple](#triple) +- [ConfigKey](#configkey) +- [ConfigValue](#configvalue) +- [DocumentMetadata](#documentmetadata) +- [ProcessingMetadata](#processingmetadata) +- [CollectionMetadata](#collectionmetadata) +- [StreamingChunk](#streamingchunk) +- [AgentThought](#agentthought) +- [AgentObservation](#agentobservation) +- [AgentAnswer](#agentanswer) +- [RAGChunk](#ragchunk) + +### Exceptions + +- [ProtocolException](#protocolexception) +- [TrustGraphException](#trustgraphexception) +- [AgentError](#agenterror) +- [ConfigError](#configerror) +- [DocumentRagError](#documentragerror) +- [FlowError](#flowerror) +- [GatewayError](#gatewayerror) +- [GraphRagError](#graphragerror) +- [LLMError](#llmerror) +- [LoadError](#loaderror) +- [LookupError](#lookuperror) +- [NLPQueryError](#nlpqueryerror) +- [ObjectsQueryError](#objectsqueryerror) +- [RequestError](#requesterror) +- [StructuredQueryError](#structuredqueryerror) +- [UnexpectedError](#unexpectederror) +- [ApplicationException](#applicationexception) + +--- + +## `Api` + +```python +from trustgraph.api import Api +``` + +Main TrustGraph API client for synchronous and asynchronous operations. + +This class provides access to all TrustGraph services including flow management, +knowledge graph operations, document processing, RAG queries, and more. It supports +both REST-based and WebSocket-based communication patterns. + +The client can be used as a context manager for automatic resource cleanup: + ```python + with Api(url="http://localhost:8088/") as api: + result = api.flow().id("default").graph_rag(query="test") + ``` + +### Methods + +### `__aenter__(self)` + +Enter asynchronous context manager. + +### `__aexit__(self, *args)` + +Exit asynchronous context manager and close connections. + +### `__enter__(self)` + +Enter synchronous context manager. + +### `__exit__(self, *args)` + +Exit synchronous context manager and close connections. + +### `__init__(self, url='http://localhost:8088/', timeout=60, token: Optional[str] = None)` + +Initialize the TrustGraph API client. + +**Arguments:** + +- `url`: Base URL for TrustGraph API (default: "http://localhost:8088/") +- `timeout`: Request timeout in seconds (default: 60) +- `token`: Optional bearer token for authentication + +**Example:** + +```python +# Local development +api = Api() + +# Production with authentication +api = Api( + url="https://trustgraph.example.com/", + timeout=120, + token="your-api-token" +) +``` + +### `aclose(self)` + +Close all asynchronous client connections. + +This method closes async WebSocket, bulk operation, and flow connections. +It is automatically called when exiting an async context manager. + +**Example:** + +```python +api = Api() +async_socket = api.async_socket() +# ... use async_socket +await api.aclose() # Clean up connections + +# Or use async context manager (automatic cleanup) +async with Api() as api: + async_socket = api.async_socket() + # ... use async_socket +# Automatically closed +``` + +### `async_bulk(self)` + +Get an asynchronous bulk operations client. + +Provides async/await style bulk import/export operations via WebSocket +for efficient handling of large datasets. + +**Returns:** AsyncBulkClient: Asynchronous bulk operations client + +**Example:** + +```python +async_bulk = api.async_bulk() + +# Export triples asynchronously +async for triple in async_bulk.export_triples(flow="default"): + print(f"{triple.s} {triple.p} {triple.o}") + +# Import with async generator +async def triple_gen(): + yield Triple(s="subj", p="pred", o="obj") + # ... more triples + +await async_bulk.import_triples( + flow="default", + triples=triple_gen() +) +``` + +### `async_flow(self)` + +Get an asynchronous REST-based flow client. + +Provides async/await style access to flow operations. This is preferred +for async Python applications and frameworks (FastAPI, aiohttp, etc.). + +**Returns:** AsyncFlow: Asynchronous flow client + +**Example:** + +```python +async_flow = api.async_flow() + +# List flows +flow_ids = await async_flow.list() + +# Execute operations +instance = async_flow.id("default") +result = await instance.text_completion( + system="You are helpful", + prompt="Hello" +) +``` + +### `async_metrics(self)` + +Get an asynchronous metrics client. + +Provides async/await style access to Prometheus metrics. + +**Returns:** AsyncMetrics: Asynchronous metrics client + +**Example:** + +```python +async_metrics = api.async_metrics() +prometheus_text = await async_metrics.get() +print(prometheus_text) +``` + +### `async_socket(self)` + +Get an asynchronous WebSocket client for streaming operations. + +Provides async/await style WebSocket access with streaming support. +This is the preferred method for async streaming in Python. + +**Returns:** AsyncSocketClient: Asynchronous WebSocket client + +**Example:** + +```python +async_socket = api.async_socket() +flow = async_socket.flow("default") + +# Stream agent responses +async for chunk in flow.agent( + question="Explain quantum computing", + user="trustgraph", + streaming=True +): + if hasattr(chunk, 'content'): + print(chunk.content, end='', flush=True) +``` + +### `bulk(self)` + +Get a synchronous bulk operations client for import/export. + +Bulk operations allow efficient transfer of large datasets via WebSocket +connections, including triples, embeddings, entity contexts, and objects. + +**Returns:** BulkClient: Synchronous bulk operations client + +**Example:** + +```python +bulk = api.bulk() + +# Export triples +for triple in bulk.export_triples(flow="default"): + print(f"{triple.s} {triple.p} {triple.o}") + +# Import triples +def triple_generator(): + yield Triple(s="subj", p="pred", o="obj") + # ... more triples + +bulk.import_triples(flow="default", triples=triple_generator()) +``` + +### `close(self)` + +Close all synchronous client connections. + +This method closes WebSocket and bulk operation connections. +It is automatically called when exiting a context manager. + +**Example:** + +```python +api = Api() +socket = api.socket() +# ... use socket +api.close() # Clean up connections + +# Or use context manager (automatic cleanup) +with Api() as api: + socket = api.socket() + # ... use socket +# Automatically closed +``` + +### `collection(self)` + +Get a Collection client for managing data collections. + +Collections organize documents and knowledge graph data into +logical groupings for isolation and access control. + +**Returns:** Collection: Collection management client + +**Example:** + +```python +collection = api.collection() + +# List collections +colls = collection.list_collections(user="trustgraph") + +# Update collection metadata +collection.update_collection( + user="trustgraph", + collection="default", + name="Default Collection", + description="Main data collection" +) +``` + +### `config(self)` + +Get a Config client for managing configuration settings. + +**Returns:** Config: Configuration management client + +**Example:** + +```python +config = api.config() + +# Get configuration values +values = config.get([ConfigKey(type="llm", key="model")]) + +# Set configuration +config.put([ConfigValue(type="llm", key="model", value="gpt-4")]) +``` + +### `flow(self)` + +Get a Flow client for managing and interacting with flows. + +Flows are the primary execution units in TrustGraph, providing access to +services like agents, RAG queries, embeddings, and document processing. + +**Returns:** Flow: Flow management client + +**Example:** + +```python +flow_client = api.flow() + +# List available blueprints +blueprints = flow_client.list_blueprints() + +# Get a specific flow instance +flow_instance = flow_client.id("default") +response = flow_instance.text_completion( + system="You are helpful", + prompt="Hello" +) +``` + +### `knowledge(self)` + +Get a Knowledge client for managing knowledge graph cores. + +**Returns:** Knowledge: Knowledge graph management client + +**Example:** + +```python +knowledge = api.knowledge() + +# List available KG cores +cores = knowledge.list_kg_cores(user="trustgraph") + +# Load a KG core +knowledge.load_kg_core(id="core-123", user="trustgraph") +``` + +### `library(self)` + +Get a Library client for document management. + +The library provides document storage, metadata management, and +processing workflow coordination. + +**Returns:** Library: Document library management client + +**Example:** + +```python +library = api.library() + +# Add a document +library.add_document( + document=b"Document content", + id="doc-123", + metadata=[], + user="trustgraph", + title="My Document", + comments="Test document" +) + +# List documents +docs = library.get_documents(user="trustgraph") +``` + +### `metrics(self)` + +Get a synchronous metrics client for monitoring. + +Retrieves Prometheus-formatted metrics from the TrustGraph service +for monitoring and observability. + +**Returns:** Metrics: Synchronous metrics client + +**Example:** + +```python +metrics = api.metrics() +prometheus_text = metrics.get() +print(prometheus_text) +``` + +### `request(self, path, request)` + +Make a low-level REST API request. + +This method is primarily for internal use but can be used for direct +API access when needed. + +**Arguments:** + +- `path`: API endpoint path (relative to base URL) +- `request`: Request payload as a dictionary + +**Returns:** dict: Response object + +**Raises:** + +- `ProtocolException`: If the response status is not 200 or response is not JSON +- `ApplicationException`: If the response contains an error + +**Example:** + +```python +response = api.request("flow", { + "operation": "list-flows" +}) +``` + +### `socket(self)` + +Get a synchronous WebSocket client for streaming operations. + +WebSocket connections provide streaming support for real-time responses +from agents, RAG queries, and text completions. This method returns a +synchronous wrapper around the WebSocket protocol. + +**Returns:** SocketClient: Synchronous WebSocket client + +**Example:** + +```python +socket = api.socket() +flow = socket.flow("default") + +# Stream agent responses +for chunk in flow.agent( + question="Explain quantum computing", + user="trustgraph", + streaming=True +): + if hasattr(chunk, 'content'): + print(chunk.content, end='', flush=True) +``` + + +--- + +## `Flow` + +```python +from trustgraph.api import Flow +``` + +Flow management client for blueprint and flow instance operations. + +This class provides methods for managing flow blueprints (templates) and +flow instances (running flows). Blueprints define the structure and +parameters of flows, while instances represent active flows that can +execute services. + +### Methods + +### `__init__(self, api)` + +Initialize Flow client. + +**Arguments:** + +- `api`: Parent Api instance for making requests + +### `delete_blueprint(self, blueprint_name)` + +Delete a flow blueprint. + +**Arguments:** + +- `blueprint_name`: Name of the blueprint to delete + +**Example:** + +```python +api.flow().delete_blueprint("old-blueprint") +``` + +### `get(self, id)` + +Get the definition of a running flow instance. + +**Arguments:** + +- `id`: Flow instance ID + +**Returns:** dict: Flow instance definition + +**Example:** + +```python +flow_def = api.flow().get("default") +print(flow_def) +``` + +### `get_blueprint(self, blueprint_name)` + +Get a flow blueprint definition by name. + +**Arguments:** + +- `blueprint_name`: Name of the blueprint to retrieve + +**Returns:** dict: Blueprint definition as a dictionary + +**Example:** + +```python +blueprint = api.flow().get_blueprint("default") +print(blueprint) # Blueprint configuration +``` + +### `id(self, id='default')` + +Get a FlowInstance for executing operations on a specific flow. + +**Arguments:** + +- `id`: Flow identifier (default: "default") + +**Returns:** FlowInstance: Flow instance for service operations + +**Example:** + +```python +flow = api.flow().id("my-flow") +response = flow.text_completion( + system="You are helpful", + prompt="Hello" +) +``` + +### `list(self)` + +List all active flow instances. + +**Returns:** list[str]: List of flow instance IDs + +**Example:** + +```python +flows = api.flow().list() +print(flows) # ['default', 'flow-1', 'flow-2', ...] +``` + +### `list_blueprints(self)` + +List all available flow blueprints. + +**Returns:** list[str]: List of blueprint names + +**Example:** + +```python +blueprints = api.flow().list_blueprints() +print(blueprints) # ['default', 'custom-flow', ...] +``` + +### `put_blueprint(self, blueprint_name, definition)` + +Create or update a flow blueprint. + +**Arguments:** + +- `blueprint_name`: Name for the blueprint +- `definition`: Blueprint definition dictionary + +**Example:** + +```python +definition = { + "services": ["text-completion", "graph-rag"], + "parameters": {"model": "gpt-4"} +} +api.flow().put_blueprint("my-blueprint", definition) +``` + +### `request(self, path=None, request=None)` + +Make a flow-scoped API request. + +**Arguments:** + +- `path`: Optional path suffix for flow endpoints +- `request`: Request payload dictionary + +**Returns:** dict: Response object + +**Raises:** + +- `RuntimeError`: If request parameter is not specified + +### `start(self, blueprint_name, id, description, parameters=None)` + +Start a new flow instance from a blueprint. + +**Arguments:** + +- `blueprint_name`: Name of the blueprint to instantiate +- `id`: Unique identifier for the flow instance +- `description`: Human-readable description +- `parameters`: Optional parameters dictionary + +**Example:** + +```python +api.flow().start( + blueprint_name="default", + id="my-flow", + description="My custom flow", + parameters={"model": "gpt-4"} +) +``` + +### `stop(self, id)` + +Stop a running flow instance. + +**Arguments:** + +- `id`: Flow instance ID to stop + +**Example:** + +```python +api.flow().stop("my-flow") +``` + + +--- + +## `FlowInstance` + +```python +from trustgraph.api import FlowInstance +``` + +Flow instance client for executing services on a specific flow. + +This class provides access to all TrustGraph services including: +- Text completion and embeddings +- Agent operations with state management +- Graph and document RAG queries +- Knowledge graph operations (triples, objects) +- Document loading and processing +- Natural language to GraphQL query conversion +- Structured data analysis and schema detection +- MCP tool execution +- Prompt templating + +Services are accessed through a running flow instance identified by ID. + +### Methods + +### `__init__(self, api, id)` + +Initialize FlowInstance. + +**Arguments:** + +- `api`: Parent Flow client +- `id`: Flow instance identifier + +### `agent(self, question, user='trustgraph', state=None, group=None, history=None)` + +Execute an agent operation with reasoning and tool use capabilities. + +Agents can perform multi-step reasoning, use tools, and maintain conversation +state across interactions. This is a synchronous non-streaming version. + +**Arguments:** + +- `question`: User question or instruction +- `user`: User identifier (default: "trustgraph") +- `state`: Optional state dictionary for stateful conversations +- `group`: Optional group identifier for multi-user contexts +- `history`: Optional conversation history as list of message dicts + +**Returns:** str: Agent's final answer + +**Example:** + +```python +flow = api.flow().id("default") + +# Simple question +answer = flow.agent( + question="What is the capital of France?", + user="trustgraph" +) + +# With conversation history +history = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi! How can I help?"} +] +answer = flow.agent( + question="Tell me about Paris", + user="trustgraph", + history=history +) +``` + +### `detect_type(self, sample)` + +Detect the data type of a structured data sample. + +**Arguments:** + +- `sample`: Data sample to analyze (string content) + +**Returns:** dict with detected_type, confidence, and optional metadata + +### `diagnose_data(self, sample, schema_name=None, options=None)` + +Perform combined data diagnosis: detect type and generate descriptor. + +**Arguments:** + +- `sample`: Data sample to analyze (string content) +- `schema_name`: Optional target schema name for descriptor generation +- `options`: Optional parameters (e.g., delimiter for CSV) + +**Returns:** dict with detected_type, confidence, descriptor, and metadata + +### `document_rag(self, query, user='trustgraph', collection='default', doc_limit=10)` + +Execute document-based Retrieval-Augmented Generation (RAG) query. + +Document RAG uses vector embeddings to find relevant document chunks, +then generates a response using an LLM with those chunks as context. + +**Arguments:** + +- `query`: Natural language query +- `user`: User/keyspace identifier (default: "trustgraph") +- `collection`: Collection identifier (default: "default") +- `doc_limit`: Maximum document chunks to retrieve (default: 10) + +**Returns:** str: Generated response incorporating document context + +**Example:** + +```python +flow = api.flow().id("default") +response = flow.document_rag( + query="Summarize the key findings", + user="trustgraph", + collection="research-papers", + doc_limit=5 +) +print(response) +``` + +### `embeddings(self, text)` + +Generate vector embeddings for text. + +Converts text into dense vector representations suitable for semantic +search and similarity comparison. + +**Arguments:** + +- `text`: Input text to embed + +**Returns:** list[float]: Vector embedding + +**Example:** + +```python +flow = api.flow().id("default") +vectors = flow.embeddings("quantum computing") +print(f"Embedding dimension: {len(vectors)}") +``` + +### `generate_descriptor(self, sample, data_type, schema_name, options=None)` + +Generate a descriptor for structured data mapping to a specific schema. + +**Arguments:** + +- `sample`: Data sample to analyze (string content) +- `data_type`: Data type (csv, json, xml) +- `schema_name`: Target schema name for descriptor generation +- `options`: Optional parameters (e.g., delimiter for CSV) + +**Returns:** dict with descriptor and metadata + +### `graph_embeddings_query(self, text, user, collection, limit=10)` + +Query knowledge graph entities using semantic similarity. + +Finds entities in the knowledge graph whose descriptions are semantically +similar to the input text, using vector embeddings. + +**Arguments:** + +- `text`: Query text for semantic search +- `user`: User/keyspace identifier +- `collection`: Collection identifier +- `limit`: Maximum number of results (default: 10) + +**Returns:** dict: Query results with similar entities + +**Example:** + +```python +flow = api.flow().id("default") +results = flow.graph_embeddings_query( + text="physicist who discovered radioactivity", + user="trustgraph", + collection="scientists", + limit=5 +) +``` + +### `graph_rag(self, query, user='trustgraph', collection='default', entity_limit=50, triple_limit=30, max_subgraph_size=150, max_path_length=2)` + +Execute graph-based Retrieval-Augmented Generation (RAG) query. + +Graph RAG uses knowledge graph structure to find relevant context by +traversing entity relationships, then generates a response using an LLM. + +**Arguments:** + +- `query`: Natural language query +- `user`: User/keyspace identifier (default: "trustgraph") +- `collection`: Collection identifier (default: "default") +- `entity_limit`: Maximum entities to retrieve (default: 50) +- `triple_limit`: Maximum triples per entity (default: 30) +- `max_subgraph_size`: Maximum total triples in subgraph (default: 150) +- `max_path_length`: Maximum traversal depth (default: 2) + +**Returns:** str: Generated response incorporating graph context + +**Example:** + +```python +flow = api.flow().id("default") +response = flow.graph_rag( + query="Tell me about Marie Curie's discoveries", + user="trustgraph", + collection="scientists", + entity_limit=20, + max_path_length=3 +) +print(response) +``` + +### `load_document(self, document, id=None, metadata=None, user=None, collection=None)` + +Load a binary document for processing. + +Uploads a document (PDF, DOCX, images, etc.) for extraction and +processing through the flow's document pipeline. + +**Arguments:** + +- `document`: Document content as bytes +- `id`: Optional document identifier (auto-generated if None) +- `metadata`: Optional metadata (list of Triples or object with emit method) +- `user`: User/keyspace identifier (optional) +- `collection`: Collection identifier (optional) + +**Returns:** dict: Processing response + +**Raises:** + +- `RuntimeError`: If metadata is provided without id + +**Example:** + +```python +flow = api.flow().id("default") + +# Load a PDF document +with open("research.pdf", "rb") as f: + result = flow.load_document( + document=f.read(), + id="research-001", + user="trustgraph", + collection="papers" + ) +``` + +### `load_text(self, text, id=None, metadata=None, charset='utf-8', user=None, collection=None)` + +Load text content for processing. + +Uploads text content for extraction and processing through the flow's +text pipeline. + +**Arguments:** + +- `text`: Text content as bytes +- `id`: Optional document identifier (auto-generated if None) +- `metadata`: Optional metadata (list of Triples or object with emit method) +- `charset`: Character encoding (default: "utf-8") +- `user`: User/keyspace identifier (optional) +- `collection`: Collection identifier (optional) + +**Returns:** dict: Processing response + +**Raises:** + +- `RuntimeError`: If metadata is provided without id + +**Example:** + +```python +flow = api.flow().id("default") + +# Load text content +text_content = b"This is the document content..." +result = flow.load_text( + text=text_content, + id="text-001", + charset="utf-8", + user="trustgraph", + collection="documents" +) +``` + +### `mcp_tool(self, name, parameters={})` + +Execute a Model Context Protocol (MCP) tool. + +MCP tools provide extensible functionality for agents and workflows, +allowing integration with external systems and services. + +**Arguments:** + +- `name`: Tool name/identifier +- `parameters`: Tool parameters dictionary (default: {}) + +**Returns:** str or dict: Tool execution result + +**Raises:** + +- `ProtocolException`: If response format is invalid + +**Example:** + +```python +flow = api.flow().id("default") + +# Execute a tool +result = flow.mcp_tool( + name="search-web", + parameters={"query": "latest AI news", "limit": 5} +) +``` + +### `nlp_query(self, question, max_results=100)` + +Convert a natural language question to a GraphQL query. + +**Arguments:** + +- `question`: Natural language question +- `max_results`: Maximum number of results to return (default: 100) + +**Returns:** dict with graphql_query, variables, detected_schemas, confidence + +### `objects_query(self, query, user='trustgraph', collection='default', variables=None, operation_name=None)` + +Execute a GraphQL query against structured objects in the knowledge graph. + +Queries structured data using GraphQL syntax, allowing complex queries +with filtering, aggregation, and relationship traversal. + +**Arguments:** + +- `query`: GraphQL query string +- `user`: User/keyspace identifier (default: "trustgraph") +- `collection`: Collection identifier (default: "default") +- `variables`: Optional query variables dictionary +- `operation_name`: Optional operation name for multi-operation documents + +**Returns:** dict: GraphQL response with 'data', 'errors', and/or 'extensions' fields + +**Raises:** + +- `ProtocolException`: If system-level error occurs + +**Example:** + +```python +flow = api.flow().id("default") + +# Simple query +query = ''' +{ + scientists(limit: 10) { + name + field + discoveries + } +} +''' +result = flow.objects_query( + query=query, + user="trustgraph", + collection="scientists" +) + +# Query with variables +query = ''' +query GetScientist($name: String!) { + scientists(name: $name) { + name + nobelPrizes + } +} +''' +result = flow.objects_query( + query=query, + variables={"name": "Marie Curie"} +) +``` + +### `prompt(self, id, variables)` + +Execute a prompt template with variable substitution. + +Prompt templates allow reusable prompt patterns with dynamic variable +substitution, useful for consistent prompt engineering. + +**Arguments:** + +- `id`: Prompt template identifier +- `variables`: Dictionary of variable name to value mappings + +**Returns:** str or dict: Rendered prompt result (text or structured object) + +**Raises:** + +- `ProtocolException`: If response format is invalid + +**Example:** + +```python +flow = api.flow().id("default") + +# Text template +result = flow.prompt( + id="summarize-template", + variables={"topic": "quantum computing", "length": "brief"} +) + +# Structured template +result = flow.prompt( + id="extract-entities", + variables={"text": "Marie Curie won Nobel Prizes"} +) +``` + +### `request(self, path, request)` + +Make a service request on this flow instance. + +**Arguments:** + +- `path`: Service path (e.g., "service/text-completion") +- `request`: Request payload dictionary + +**Returns:** dict: Service response + +### `schema_selection(self, sample, options=None)` + +Select matching schemas for a data sample using prompt analysis. + +**Arguments:** + +- `sample`: Data sample to analyze (string content) +- `options`: Optional parameters + +**Returns:** dict with schema_matches array and metadata + +### `structured_query(self, question, user='trustgraph', collection='default')` + +Execute a natural language question against structured data. +Combines NLP query conversion and GraphQL execution. + +**Arguments:** + +- `question`: Natural language question +- `user`: Cassandra keyspace identifier (default: "trustgraph") +- `collection`: Data collection identifier (default: "default") + +**Returns:** dict with data and optional errors + +### `text_completion(self, system, prompt)` + +Execute text completion using the flow's LLM. + +**Arguments:** + +- `system`: System prompt defining the assistant's behavior +- `prompt`: User prompt/question + +**Returns:** str: Generated response text + +**Example:** + +```python +flow = api.flow().id("default") +response = flow.text_completion( + system="You are a helpful assistant", + prompt="What is quantum computing?" +) +print(response) +``` + +### `triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=10000)` + +Query knowledge graph triples using pattern matching. + +Searches for RDF triples matching the given subject, predicate, and/or +object patterns. Unspecified parameters act as wildcards. + +**Arguments:** + +- `s`: Subject URI (optional, use None for wildcard) +- `p`: Predicate URI (optional, use None for wildcard) +- `o`: Object URI or Literal (optional, use None for wildcard) +- `user`: User/keyspace identifier (optional) +- `collection`: Collection identifier (optional) +- `limit`: Maximum results to return (default: 10000) + +**Returns:** list[Triple]: List of matching Triple objects + +**Raises:** + +- `RuntimeError`: If s or p is not a Uri, or o is not Uri/Literal + +**Example:** + +```python +from trustgraph.knowledge import Uri, Literal + +flow = api.flow().id("default") + +# Find all triples about a specific subject +triples = flow.triples_query( + s=Uri("http://example.org/person/marie-curie"), + user="trustgraph", + collection="scientists" +) + +# Find all instances of a specific relationship +triples = flow.triples_query( + p=Uri("http://example.org/ontology/discovered"), + limit=100 +) +``` + + +--- + +## `AsyncFlow` + +```python +from trustgraph.api import AsyncFlow +``` + +Asynchronous REST-based flow interface + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `aclose(self) -> None` + +Close connection (cleanup handled by aiohttp session) + +### `delete_class(self, class_name: str)` + +Delete flow class + +### `get(self, id: str) -> Dict[str, Any]` + +Get flow definition + +### `get_class(self, class_name: str) -> Dict[str, Any]` + +Get flow class definition + +### `id(self, flow_id: str)` + +Get async flow instance + +### `list(self) -> List[str]` + +List all flows + +### `list_classes(self) -> List[str]` + +List flow classes + +### `put_class(self, class_name: str, definition: Dict[str, Any])` + +Create/update flow class + +### `request(self, path: str, request_data: Dict[str, Any]) -> Dict[str, Any]` + +Make async HTTP request to Gateway API + +### `start(self, class_name: str, id: str, description: str, parameters: Optional[Dict] = None)` + +Start a flow + +### `stop(self, id: str)` + +Stop a flow + + +--- + +## `AsyncFlowInstance` + +```python +from trustgraph.api import AsyncFlowInstance +``` + +Asynchronous REST flow instance + +### Methods + +### `__init__(self, flow: trustgraph.api.async_flow.AsyncFlow, flow_id: str)` + +Initialize self. See help(type(self)) for accurate signature. + +### `agent(self, question: str, user: str, state: Optional[Dict] = None, group: Optional[str] = None, history: Optional[List] = None, **kwargs: Any) -> Dict[str, Any]` + +Execute agent (non-streaming, use async_socket for streaming) + +### `document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, **kwargs: Any) -> str` + +Document RAG (non-streaming, use async_socket for streaming) + +### `embeddings(self, text: str, **kwargs: Any)` + +Generate text embeddings + +### `graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs: Any)` + +Query graph embeddings for semantic search + +### `graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, **kwargs: Any) -> str` + +Graph RAG (non-streaming, use async_socket for streaming) + +### `objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None, operation_name: Optional[str] = None, **kwargs: Any)` + +GraphQL query + +### `request(self, service: str, request_data: Dict[str, Any]) -> Dict[str, Any]` + +Make request to flow-scoped service + +### `text_completion(self, system: str, prompt: str, **kwargs: Any) -> str` + +Text completion (non-streaming, use async_socket for streaming) + +### `triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs: Any)` + +Triple pattern query + + +--- + +## `SocketClient` + +```python +from trustgraph.api import SocketClient +``` + +Synchronous WebSocket client (wraps async websockets library) + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `close(self) -> None` + +Close WebSocket connection + +### `flow(self, flow_id: str) -> 'SocketFlowInstance'` + +Get flow instance for WebSocket operations + + +--- + +## `SocketFlowInstance` + +```python +from trustgraph.api import SocketFlowInstance +``` + +Synchronous WebSocket flow instance with same interface as REST FlowInstance + +### Methods + +### `__init__(self, client: trustgraph.api.socket_client.SocketClient, flow_id: str) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `agent(self, question: str, user: str, state: Optional[Dict[str, Any]] = None, group: Optional[str] = None, history: Optional[List[Dict[str, Any]]] = None, streaming: bool = False, **kwargs: Any) -> Union[Dict[str, Any], Iterator[trustgraph.api.types.StreamingChunk]]` + +Agent with optional streaming + +### `document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, streaming: bool = False, **kwargs: Any) -> Union[str, Iterator[str]]` + +Document RAG with optional streaming + +### `embeddings(self, text: str, **kwargs: Any) -> Dict[str, Any]` + +Generate text embeddings + +### `graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs: Any) -> Dict[str, Any]` + +Query graph embeddings for semantic search + +### `graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, streaming: bool = False, **kwargs: Any) -> Union[str, Iterator[str]]` + +Graph RAG with optional streaming + +### `mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs: Any) -> Dict[str, Any]` + +Execute MCP tool + +### `objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict[str, Any]] = None, operation_name: Optional[str] = None, **kwargs: Any) -> Dict[str, Any]` + +GraphQL query + +### `prompt(self, id: str, variables: Dict[str, str], streaming: bool = False, **kwargs: Any) -> Union[str, Iterator[str]]` + +Execute prompt with optional streaming + +### `text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]` + +Text completion with optional streaming + +### `triples_query(self, s: Optional[str] = None, p: Optional[str] = None, o: Optional[str] = None, user: Optional[str] = None, collection: Optional[str] = None, limit: int = 100, **kwargs: Any) -> Dict[str, Any]` + +Triple pattern query + + +--- + +## `AsyncSocketClient` + +```python +from trustgraph.api import AsyncSocketClient +``` + +Asynchronous WebSocket client + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str])` + +Initialize self. See help(type(self)) for accurate signature. + +### `aclose(self)` + +Close WebSocket connection + +### `flow(self, flow_id: str)` + +Get async flow instance for WebSocket operations + + +--- + +## `AsyncSocketFlowInstance` + +```python +from trustgraph.api import AsyncSocketFlowInstance +``` + +Asynchronous WebSocket flow instance + +### Methods + +### `__init__(self, client: trustgraph.api.async_socket_client.AsyncSocketClient, flow_id: str)` + +Initialize self. See help(type(self)) for accurate signature. + +### `agent(self, question: str, user: str, state: Optional[Dict[str, Any]] = None, group: Optional[str] = None, history: Optional[list] = None, streaming: bool = False, **kwargs) -> Union[Dict[str, Any], AsyncIterator]` + +Agent with optional streaming + +### `document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, streaming: bool = False, **kwargs)` + +Document RAG with optional streaming + +### `embeddings(self, text: str, **kwargs)` + +Generate text embeddings + +### `graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs)` + +Query graph embeddings for semantic search + +### `graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, streaming: bool = False, **kwargs)` + +Graph RAG with optional streaming + +### `mcp_tool(self, name: str, parameters: Dict[str, Any], **kwargs)` + +Execute MCP tool + +### `objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None, operation_name: Optional[str] = None, **kwargs)` + +GraphQL query + +### `prompt(self, id: str, variables: Dict[str, str], streaming: bool = False, **kwargs)` + +Execute prompt with optional streaming + +### `text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs)` + +Text completion with optional streaming + +### `triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs)` + +Triple pattern query + + +--- + +## `BulkClient` + +```python +from trustgraph.api import BulkClient +``` + +Synchronous bulk operations client + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `close(self) -> None` + +Close connections + +### `export_document_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]` + +Bulk export document embeddings via WebSocket + +### `export_entity_contexts(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]` + +Bulk export entity contexts via WebSocket + +### `export_graph_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]` + +Bulk export graph embeddings via WebSocket + +### `export_triples(self, flow: str, **kwargs: Any) -> Iterator[trustgraph.api.types.Triple]` + +Bulk export triples via WebSocket + +### `import_document_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import document embeddings via WebSocket + +### `import_entity_contexts(self, flow: str, contexts: Iterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import entity contexts via WebSocket + +### `import_graph_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import graph embeddings via WebSocket + +### `import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import objects via WebSocket + +### `import_triples(self, flow: str, triples: Iterator[trustgraph.api.types.Triple], **kwargs: Any) -> None` + +Bulk import triples via WebSocket + + +--- + +## `AsyncBulkClient` + +```python +from trustgraph.api import AsyncBulkClient +``` + +Asynchronous bulk operations client + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `aclose(self) -> None` + +Close connections + +### `export_document_embeddings(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]` + +Bulk export document embeddings via WebSocket + +### `export_entity_contexts(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]` + +Bulk export entity contexts via WebSocket + +### `export_graph_embeddings(self, flow: str, **kwargs: Any) -> AsyncIterator[Dict[str, Any]]` + +Bulk export graph embeddings via WebSocket + +### `export_triples(self, flow: str, **kwargs: Any) -> AsyncIterator[trustgraph.api.types.Triple]` + +Bulk export triples via WebSocket + +### `import_document_embeddings(self, flow: str, embeddings: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import document embeddings via WebSocket + +### `import_entity_contexts(self, flow: str, contexts: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import entity contexts via WebSocket + +### `import_graph_embeddings(self, flow: str, embeddings: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import graph embeddings via WebSocket + +### `import_objects(self, flow: str, objects: AsyncIterator[Dict[str, Any]], **kwargs: Any) -> None` + +Bulk import objects via WebSocket + +### `import_triples(self, flow: str, triples: AsyncIterator[trustgraph.api.types.Triple], **kwargs: Any) -> None` + +Bulk import triples via WebSocket + + +--- + +## `Metrics` + +```python +from trustgraph.api import Metrics +``` + +Synchronous metrics client + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `get(self) -> str` + +Get Prometheus metrics as text + + +--- + +## `AsyncMetrics` + +```python +from trustgraph.api import AsyncMetrics +``` + +Asynchronous metrics client + +### Methods + +### `__init__(self, url: str, timeout: int, token: Optional[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + +### `aclose(self) -> None` + +Close connections + +### `get(self) -> str` + +Get Prometheus metrics as text + + +--- + +## `Triple` + +```python +from trustgraph.api import Triple +``` + +RDF triple representing a knowledge graph statement. + +**Fields:** + +- `s`: +- `p`: +- `o`: + +### Methods + +### `__init__(self, s: str, p: str, o: str) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `ConfigKey` + +```python +from trustgraph.api import ConfigKey +``` + +Configuration key identifier. + +**Fields:** + +- `type`: +- `key`: + +### Methods + +### `__init__(self, type: str, key: str) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `ConfigValue` + +```python +from trustgraph.api import ConfigValue +``` + +Configuration key-value pair. + +**Fields:** + +- `type`: +- `key`: +- `value`: + +### Methods + +### `__init__(self, type: str, key: str, value: str) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `DocumentMetadata` + +```python +from trustgraph.api import DocumentMetadata +``` + +Metadata for a document in the library. + +**Fields:** + +- `id`: +- `time`: +- `kind`: +- `title`: +- `comments`: +- `metadata`: typing.List[trustgraph.api.types.Triple] +- `user`: +- `tags`: typing.List[str] + +### Methods + +### `__init__(self, id: str, time: datetime.datetime, kind: str, title: str, comments: str, metadata: List[trustgraph.api.types.Triple], user: str, tags: List[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `ProcessingMetadata` + +```python +from trustgraph.api import ProcessingMetadata +``` + +Metadata for an active document processing job. + +**Fields:** + +- `id`: +- `document_id`: +- `time`: +- `flow`: +- `user`: +- `collection`: +- `tags`: typing.List[str] + +### Methods + +### `__init__(self, id: str, document_id: str, time: datetime.datetime, flow: str, user: str, collection: str, tags: List[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `CollectionMetadata` + +```python +from trustgraph.api import CollectionMetadata +``` + +Metadata for a data collection. + +Collections provide logical grouping and isolation for documents and +knowledge graph data. + +**Attributes:** + +- `name: Human`: readable collection name + +**Fields:** + +- `user`: +- `collection`: +- `name`: +- `description`: +- `tags`: typing.List[str] + +### Methods + +### `__init__(self, user: str, collection: str, name: str, description: str, tags: List[str]) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `StreamingChunk` + +```python +from trustgraph.api import StreamingChunk +``` + +Base class for streaming response chunks. + +Used for WebSocket-based streaming operations where responses are delivered +incrementally as they are generated. + +**Fields:** + +- `content`: +- `end_of_message`: + +### Methods + +### `__init__(self, content: str, end_of_message: bool = False) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `AgentThought` + +```python +from trustgraph.api import AgentThought +``` + +Agent reasoning/thought process chunk. + +Represents the agent's internal reasoning or planning steps during execution. +These chunks show how the agent is thinking about the problem. + +**Fields:** + +- `content`: +- `end_of_message`: +- `chunk_type`: + +### Methods + +### `__init__(self, content: str, end_of_message: bool = False, chunk_type: str = 'thought') -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `AgentObservation` + +```python +from trustgraph.api import AgentObservation +``` + +Agent tool execution observation chunk. + +Represents the result or observation from executing a tool or action. +These chunks show what the agent learned from using tools. + +**Fields:** + +- `content`: +- `end_of_message`: +- `chunk_type`: + +### Methods + +### `__init__(self, content: str, end_of_message: bool = False, chunk_type: str = 'observation') -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `AgentAnswer` + +```python +from trustgraph.api import AgentAnswer +``` + +Agent final answer chunk. + +Represents the agent's final response to the user's query after completing +its reasoning and tool use. + +**Attributes:** + +- `chunk_type: Always "final`: answer" + +**Fields:** + +- `content`: +- `end_of_message`: +- `chunk_type`: +- `end_of_dialog`: + +### Methods + +### `__init__(self, content: str, end_of_message: bool = False, chunk_type: str = 'final-answer', end_of_dialog: bool = False) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `RAGChunk` + +```python +from trustgraph.api import RAGChunk +``` + +RAG (Retrieval-Augmented Generation) streaming chunk. + +Used for streaming responses from graph RAG, document RAG, text completion, +and other generative services. + +**Fields:** + +- `content`: +- `end_of_message`: +- `chunk_type`: +- `end_of_stream`: +- `error`: typing.Optional[typing.Dict[str, str]] + +### Methods + +### `__init__(self, content: str, end_of_message: bool = False, chunk_type: str = 'rag', end_of_stream: bool = False, error: Optional[Dict[str, str]] = None) -> None` + +Initialize self. See help(type(self)) for accurate signature. + + +--- + +## `ProtocolException` + +```python +from trustgraph.api import ProtocolException +``` + +Raised when WebSocket protocol errors occur + + +--- + +## `TrustGraphException` + +```python +from trustgraph.api import TrustGraphException +``` + +Base class for all TrustGraph service errors + + +--- + +## `AgentError` + +```python +from trustgraph.api import AgentError +``` + +Agent service error + + +--- + +## `ConfigError` + +```python +from trustgraph.api import ConfigError +``` + +Configuration service error + + +--- + +## `DocumentRagError` + +```python +from trustgraph.api import DocumentRagError +``` + +Document RAG retrieval error + + +--- + +## `FlowError` + +```python +from trustgraph.api import FlowError +``` + +Flow management error + + +--- + +## `GatewayError` + +```python +from trustgraph.api import GatewayError +``` + +API Gateway error + + +--- + +## `GraphRagError` + +```python +from trustgraph.api import GraphRagError +``` + +Graph RAG retrieval error + + +--- + +## `LLMError` + +```python +from trustgraph.api import LLMError +``` + +LLM service error + + +--- + +## `LoadError` + +```python +from trustgraph.api import LoadError +``` + +Data loading error + + +--- + +## `LookupError` + +```python +from trustgraph.api import LookupError +``` + +Lookup/search error + + +--- + +## `NLPQueryError` + +```python +from trustgraph.api import NLPQueryError +``` + +NLP query service error + + +--- + +## `ObjectsQueryError` + +```python +from trustgraph.api import ObjectsQueryError +``` + +Objects query service error + + +--- + +## `RequestError` + +```python +from trustgraph.api import RequestError +``` + +Request processing error + + +--- + +## `StructuredQueryError` + +```python +from trustgraph.api import StructuredQueryError +``` + +Structured query service error + + +--- + +## `UnexpectedError` + +```python +from trustgraph.api import UnexpectedError +``` + +Unexpected/unknown error + + +--- + +## `ApplicationException` + +```python +from trustgraph.api import ApplicationException +``` + +Base class for all TrustGraph service errors + + +--- + diff --git a/trustgraph-base/trustgraph/api/__init__.py b/trustgraph-base/trustgraph/api/__init__.py index 0ecb760e..93466cd2 100644 --- a/trustgraph-base/trustgraph/api/__init__.py +++ b/trustgraph-base/trustgraph/api/__init__.py @@ -1,3 +1,55 @@ +""" +TrustGraph API Client Library + +This package provides Python client interfaces for interacting with TrustGraph services. +TrustGraph is a knowledge graph and RAG (Retrieval-Augmented Generation) platform that +combines graph databases, vector embeddings, and LLM capabilities. + +The library offers both synchronous and asynchronous APIs for: +- Flow management and execution +- Knowledge graph operations (triples, entities, embeddings) +- RAG queries (graph-based and document-based) +- Agent interactions with streaming support +- WebSocket-based real-time communication +- Bulk import/export operations +- Configuration and collection management + +Quick Start: + ```python + from trustgraph.api import Api + + # Create API client + api = Api(url="http://localhost:8088/") + + # Get a flow instance + flow = api.flow().id("default") + + # Execute a graph RAG query + response = flow.graph_rag( + query="What are the main topics?", + user="trustgraph", + collection="default" + ) + ``` + +For streaming and async operations: + ```python + # WebSocket streaming + socket = api.socket() + flow = socket.flow("default") + + for chunk in flow.agent(question="Hello", user="trustgraph"): + print(chunk.content) + + # Async operations + async with Api(url="http://localhost:8088/") as api: + async_flow = api.async_flow() + result = await async_flow.id("default").text_completion( + system="You are helpful", + prompt="Hello" + ) + ``` +""" # Core API from .api import Api diff --git a/trustgraph-base/trustgraph/api/api.py b/trustgraph-base/trustgraph/api/api.py index d1f07513..dbdce0a8 100644 --- a/trustgraph-base/trustgraph/api/api.py +++ b/trustgraph-base/trustgraph/api/api.py @@ -1,3 +1,8 @@ +""" +TrustGraph API Client + +Core API client for interacting with TrustGraph services via REST and WebSocket protocols. +""" import requests import json @@ -26,8 +31,47 @@ def check_error(response): raise ApplicationException(f"{tp}: {msg}") class Api: + """ + Main TrustGraph API client for synchronous and asynchronous operations. + + This class provides access to all TrustGraph services including flow management, + knowledge graph operations, document processing, RAG queries, and more. It supports + both REST-based and WebSocket-based communication patterns. + + The client can be used as a context manager for automatic resource cleanup: + ```python + with Api(url="http://localhost:8088/") as api: + result = api.flow().id("default").graph_rag(query="test") + ``` + + Attributes: + url: Base URL for the TrustGraph API endpoint + timeout: Request timeout in seconds + token: Optional bearer token for authentication + """ def __init__(self, url="http://localhost:8088/", timeout=60, token: Optional[str] = None): + """ + Initialize the TrustGraph API client. + + Args: + url: Base URL for TrustGraph API (default: "http://localhost:8088/") + timeout: Request timeout in seconds (default: 60) + token: Optional bearer token for authentication + + Example: + ```python + # Local development + api = Api() + + # Production with authentication + api = Api( + url="https://trustgraph.example.com/", + timeout=120, + token="your-api-token" + ) + ``` + """ self.url = url @@ -49,15 +93,97 @@ class Api: self._async_metrics = None def flow(self): + """ + Get a Flow client for managing and interacting with flows. + + Flows are the primary execution units in TrustGraph, providing access to + services like agents, RAG queries, embeddings, and document processing. + + Returns: + Flow: Flow management client + + Example: + ```python + flow_client = api.flow() + + # List available blueprints + blueprints = flow_client.list_blueprints() + + # Get a specific flow instance + flow_instance = flow_client.id("default") + response = flow_instance.text_completion( + system="You are helpful", + prompt="Hello" + ) + ``` + """ return Flow(api=self) def config(self): + """ + Get a Config client for managing configuration settings. + + Returns: + Config: Configuration management client + + Example: + ```python + config = api.config() + + # Get configuration values + values = config.get([ConfigKey(type="llm", key="model")]) + + # Set configuration + config.put([ConfigValue(type="llm", key="model", value="gpt-4")]) + ``` + """ return Config(api=self) def knowledge(self): + """ + Get a Knowledge client for managing knowledge graph cores. + + Returns: + Knowledge: Knowledge graph management client + + Example: + ```python + knowledge = api.knowledge() + + # List available KG cores + cores = knowledge.list_kg_cores(user="trustgraph") + + # Load a KG core + knowledge.load_kg_core(id="core-123", user="trustgraph") + ``` + """ return Knowledge(api=self) def request(self, path, request): + """ + Make a low-level REST API request. + + This method is primarily for internal use but can be used for direct + API access when needed. + + Args: + path: API endpoint path (relative to base URL) + request: Request payload as a dictionary + + Returns: + dict: Response object + + Raises: + ProtocolException: If the response status is not 200 or response is not JSON + ApplicationException: If the response contains an error + + Example: + ```python + response = api.request("flow", { + "operation": "list-flows" + }) + ``` + """ url = f"{self.url}{path}" @@ -83,14 +209,90 @@ class Api: return object def library(self): + """ + Get a Library client for document management. + + The library provides document storage, metadata management, and + processing workflow coordination. + + Returns: + Library: Document library management client + + Example: + ```python + library = api.library() + + # Add a document + library.add_document( + document=b"Document content", + id="doc-123", + metadata=[], + user="trustgraph", + title="My Document", + comments="Test document" + ) + + # List documents + docs = library.get_documents(user="trustgraph") + ``` + """ return Library(self) def collection(self): + """ + Get a Collection client for managing data collections. + + Collections organize documents and knowledge graph data into + logical groupings for isolation and access control. + + Returns: + Collection: Collection management client + + Example: + ```python + collection = api.collection() + + # List collections + colls = collection.list_collections(user="trustgraph") + + # Update collection metadata + collection.update_collection( + user="trustgraph", + collection="default", + name="Default Collection", + description="Main data collection" + ) + ``` + """ return Collection(self) # New synchronous methods def socket(self): - """Synchronous WebSocket-based interface for streaming operations""" + """ + Get a synchronous WebSocket client for streaming operations. + + WebSocket connections provide streaming support for real-time responses + from agents, RAG queries, and text completions. This method returns a + synchronous wrapper around the WebSocket protocol. + + Returns: + SocketClient: Synchronous WebSocket client + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Stream agent responses + for chunk in flow.agent( + question="Explain quantum computing", + user="trustgraph", + streaming=True + ): + if hasattr(chunk, 'content'): + print(chunk.content, end='', flush=True) + ``` + """ if self._socket_client is None: from . socket_client import SocketClient # Extract base URL (remove api/v1/ suffix) @@ -99,7 +301,31 @@ class Api: return self._socket_client def bulk(self): - """Synchronous bulk operations interface for import/export""" + """ + Get a synchronous bulk operations client for import/export. + + Bulk operations allow efficient transfer of large datasets via WebSocket + connections, including triples, embeddings, entity contexts, and objects. + + Returns: + BulkClient: Synchronous bulk operations client + + Example: + ```python + bulk = api.bulk() + + # Export triples + for triple in bulk.export_triples(flow="default"): + print(f"{triple.s} {triple.p} {triple.o}") + + # Import triples + def triple_generator(): + yield Triple(s="subj", p="pred", o="obj") + # ... more triples + + bulk.import_triples(flow="default", triples=triple_generator()) + ``` + """ if self._bulk_client is None: from . bulk_client import BulkClient # Extract base URL (remove api/v1/ suffix) @@ -108,7 +334,22 @@ class Api: return self._bulk_client def metrics(self): - """Synchronous metrics interface""" + """ + Get a synchronous metrics client for monitoring. + + Retrieves Prometheus-formatted metrics from the TrustGraph service + for monitoring and observability. + + Returns: + Metrics: Synchronous metrics client + + Example: + ```python + metrics = api.metrics() + prometheus_text = metrics.get() + print(prometheus_text) + ``` + """ if self._metrics is None: from . metrics import Metrics # Extract base URL (remove api/v1/ suffix) @@ -118,14 +359,60 @@ class Api: # New asynchronous methods def async_flow(self): - """Asynchronous REST-based flow interface""" + """ + Get an asynchronous REST-based flow client. + + Provides async/await style access to flow operations. This is preferred + for async Python applications and frameworks (FastAPI, aiohttp, etc.). + + Returns: + AsyncFlow: Asynchronous flow client + + Example: + ```python + async_flow = api.async_flow() + + # List flows + flow_ids = await async_flow.list() + + # Execute operations + instance = async_flow.id("default") + result = await instance.text_completion( + system="You are helpful", + prompt="Hello" + ) + ``` + """ if self._async_flow is None: from . async_flow import AsyncFlow self._async_flow = AsyncFlow(self.url, self.timeout, self.token) return self._async_flow def async_socket(self): - """Asynchronous WebSocket-based interface for streaming operations""" + """ + Get an asynchronous WebSocket client for streaming operations. + + Provides async/await style WebSocket access with streaming support. + This is the preferred method for async streaming in Python. + + Returns: + AsyncSocketClient: Asynchronous WebSocket client + + Example: + ```python + async_socket = api.async_socket() + flow = async_socket.flow("default") + + # Stream agent responses + async for chunk in flow.agent( + question="Explain quantum computing", + user="trustgraph", + streaming=True + ): + if hasattr(chunk, 'content'): + print(chunk.content, end='', flush=True) + ``` + """ if self._async_socket_client is None: from . async_socket_client import AsyncSocketClient # Extract base URL (remove api/v1/ suffix) @@ -134,7 +421,34 @@ class Api: return self._async_socket_client def async_bulk(self): - """Asynchronous bulk operations interface for import/export""" + """ + Get an asynchronous bulk operations client. + + Provides async/await style bulk import/export operations via WebSocket + for efficient handling of large datasets. + + Returns: + AsyncBulkClient: Asynchronous bulk operations client + + Example: + ```python + async_bulk = api.async_bulk() + + # Export triples asynchronously + async for triple in async_bulk.export_triples(flow="default"): + print(f"{triple.s} {triple.p} {triple.o}") + + # Import with async generator + async def triple_gen(): + yield Triple(s="subj", p="pred", o="obj") + # ... more triples + + await async_bulk.import_triples( + flow="default", + triples=triple_gen() + ) + ``` + """ if self._async_bulk_client is None: from . async_bulk_client import AsyncBulkClient # Extract base URL (remove api/v1/ suffix) @@ -143,7 +457,21 @@ class Api: return self._async_bulk_client def async_metrics(self): - """Asynchronous metrics interface""" + """ + Get an asynchronous metrics client. + + Provides async/await style access to Prometheus metrics. + + Returns: + AsyncMetrics: Asynchronous metrics client + + Example: + ```python + async_metrics = api.async_metrics() + prometheus_text = await async_metrics.get() + print(prometheus_text) + ``` + """ if self._async_metrics is None: from . async_metrics import AsyncMetrics # Extract base URL (remove api/v1/ suffix) @@ -153,14 +481,52 @@ class Api: # Resource management def close(self): - """Close all synchronous connections""" + """ + Close all synchronous client connections. + + This method closes WebSocket and bulk operation connections. + It is automatically called when exiting a context manager. + + Example: + ```python + api = Api() + socket = api.socket() + # ... use socket + api.close() # Clean up connections + + # Or use context manager (automatic cleanup) + with Api() as api: + socket = api.socket() + # ... use socket + # Automatically closed + ``` + """ if self._socket_client: self._socket_client.close() if self._bulk_client: self._bulk_client.close() async def aclose(self): - """Close all asynchronous connections""" + """ + Close all asynchronous client connections. + + This method closes async WebSocket, bulk operation, and flow connections. + It is automatically called when exiting an async context manager. + + Example: + ```python + api = Api() + async_socket = api.async_socket() + # ... use async_socket + await api.aclose() # Clean up connections + + # Or use async context manager (automatic cleanup) + async with Api() as api: + async_socket = api.async_socket() + # ... use async_socket + # Automatically closed + ``` + """ if self._async_socket_client: await self._async_socket_client.aclose() if self._async_bulk_client: @@ -170,13 +536,17 @@ class Api: # Context manager support def __enter__(self): + """Enter synchronous context manager.""" return self def __exit__(self, *args): + """Exit synchronous context manager and close connections.""" self.close() async def __aenter__(self): + """Enter asynchronous context manager.""" return self async def __aexit__(self, *args): + """Exit asynchronous context manager and close connections.""" await self.aclose() diff --git a/trustgraph-base/trustgraph/api/async_flow.py b/trustgraph-base/trustgraph/api/async_flow.py index 5d3cd486..6b28886b 100644 --- a/trustgraph-base/trustgraph/api/async_flow.py +++ b/trustgraph-base/trustgraph/api/async_flow.py @@ -1,3 +1,14 @@ +""" +TrustGraph Asynchronous Flow Management + +This module provides async/await based interfaces for managing and interacting +with TrustGraph flows using REST API calls. Unlike async_socket_client which +provides streaming support, this module is focused on non-streaming operations +that return complete responses. + +For streaming support (e.g., real-time agent responses, streaming RAG), use +AsyncSocketClient instead. +""" import aiohttp import json @@ -18,15 +29,47 @@ def check_error(response): class AsyncFlow: - """Asynchronous REST-based flow interface""" + """ + Asynchronous flow management client using REST API. + + Provides async/await based flow management operations including listing, + starting, stopping flows, and managing flow class definitions. Also provides + access to flow-scoped services like agents, RAG, and queries via non-streaming + REST endpoints. + + Note: For streaming support, use AsyncSocketClient instead. + """ def __init__(self, url: str, timeout: int, token: Optional[str]) -> None: + """ + Initialize async flow client. + + Args: + url: Base URL for TrustGraph API + timeout: Request timeout in seconds + token: Optional bearer token for authentication + """ self.url: str = url self.timeout: int = timeout self.token: Optional[str] = token async def request(self, path: str, request_data: Dict[str, Any]) -> Dict[str, Any]: - """Make async HTTP request to Gateway API""" + """ + Make async HTTP POST request to Gateway API. + + Internal method for making authenticated requests to the TrustGraph API. + + Args: + path: API endpoint path (relative to base URL) + request_data: Request payload dictionary + + Returns: + dict: Response object from API + + Raises: + ProtocolException: If HTTP status is not 200 or response is not valid JSON + ApplicationException: If API returns an error response + """ url = f"{self.url}{path}" headers = {"Content-Type": "application/json"} @@ -49,12 +92,49 @@ class AsyncFlow: return obj async def list(self) -> List[str]: - """List all flows""" + """ + List all flow identifiers. + + Retrieves IDs of all flows currently deployed in the system. + + Returns: + list[str]: List of flow identifiers + + Example: + ```python + async_flow = await api.async_flow() + + # List all flows + flows = await async_flow.list() + print(f"Available flows: {flows}") + ``` + """ result = await self.request("flow", {"operation": "list-flows"}) return result.get("flow-ids", []) async def get(self, id: str) -> Dict[str, Any]: - """Get flow definition""" + """ + Get flow definition. + + Retrieves the complete flow configuration including its class name, + description, and parameters. + + Args: + id: Flow identifier + + Returns: + dict: Flow definition object + + Example: + ```python + async_flow = await api.async_flow() + + # Get flow definition + flow_def = await async_flow.get("default") + print(f"Flow class: {flow_def.get('class-name')}") + print(f"Description: {flow_def.get('description')}") + ``` + """ result = await self.request("flow", { "operation": "get-flow", "flow-id": id @@ -62,7 +142,31 @@ class AsyncFlow: return json.loads(result.get("flow", "{}")) async def start(self, class_name: str, id: str, description: str, parameters: Optional[Dict] = None): - """Start a flow""" + """ + Start a new flow instance. + + Creates and starts a flow from a flow class definition with the specified + parameters. + + Args: + class_name: Flow class name to instantiate + id: Identifier for the new flow instance + description: Human-readable description of the flow + parameters: Optional configuration parameters for the flow + + Example: + ```python + async_flow = await api.async_flow() + + # Start a flow from a class + await async_flow.start( + class_name="default", + id="my-flow", + description="Custom flow instance", + parameters={"model": "claude-3-opus"} + ) + ``` + """ request_data = { "operation": "start-flow", "flow-id": id, @@ -75,19 +179,70 @@ class AsyncFlow: await self.request("flow", request_data) async def stop(self, id: str): - """Stop a flow""" + """ + Stop a running flow. + + Stops and removes a flow instance, freeing its resources. + + Args: + id: Flow identifier to stop + + Example: + ```python + async_flow = await api.async_flow() + + # Stop a flow + await async_flow.stop("my-flow") + ``` + """ await self.request("flow", { "operation": "stop-flow", "flow-id": id }) async def list_classes(self) -> List[str]: - """List flow classes""" + """ + List all flow class names. + + Retrieves names of all flow classes (blueprints) available in the system. + + Returns: + list[str]: List of flow class names + + Example: + ```python + async_flow = await api.async_flow() + + # List available flow classes + classes = await async_flow.list_classes() + print(f"Available flow classes: {classes}") + ``` + """ result = await self.request("flow", {"operation": "list-classes"}) return result.get("class-names", []) async def get_class(self, class_name: str) -> Dict[str, Any]: - """Get flow class definition""" + """ + Get flow class definition. + + Retrieves the blueprint definition for a flow class, including its + configuration schema and service bindings. + + Args: + class_name: Flow class name + + Returns: + dict: Flow class definition object + + Example: + ```python + async_flow = await api.async_flow() + + # Get flow class definition + class_def = await async_flow.get_class("default") + print(f"Services: {class_def.get('services')}") + ``` + """ result = await self.request("flow", { "operation": "get-class", "class-name": class_name @@ -95,7 +250,29 @@ class AsyncFlow: return json.loads(result.get("class-definition", "{}")) async def put_class(self, class_name: str, definition: Dict[str, Any]): - """Create/update flow class""" + """ + Create or update a flow class definition. + + Stores a flow class blueprint that can be used to instantiate flows. + + Args: + class_name: Flow class name + definition: Flow class definition object + + Example: + ```python + async_flow = await api.async_flow() + + # Create a custom flow class + class_def = { + "services": { + "agent": {"module": "agent", "config": {...}}, + "graph-rag": {"module": "graph-rag", "config": {...}} + } + } + await async_flow.put_class("custom-flow", class_def) + ``` + """ await self.request("flow", { "operation": "put-class", "class-name": class_name, @@ -103,35 +280,145 @@ class AsyncFlow: }) async def delete_class(self, class_name: str): - """Delete flow class""" + """ + Delete a flow class definition. + + Removes a flow class blueprint from the system. Does not affect + running flow instances. + + Args: + class_name: Flow class name to delete + + Example: + ```python + async_flow = await api.async_flow() + + # Delete a flow class + await async_flow.delete_class("old-flow-class") + ``` + """ await self.request("flow", { "operation": "delete-class", "class-name": class_name }) def id(self, flow_id: str): - """Get async flow instance""" + """ + Get an async flow instance client. + + Returns a client for interacting with a specific flow's services + (agent, RAG, queries, embeddings, etc.). + + Args: + flow_id: Flow identifier + + Returns: + AsyncFlowInstance: Client for flow-specific operations + + Example: + ```python + async_flow = await api.async_flow() + + # Get flow instance + flow = async_flow.id("default") + + # Use flow services + result = await flow.graph_rag( + query="What is TrustGraph?", + user="trustgraph", + collection="default" + ) + ``` + """ return AsyncFlowInstance(self, flow_id) async def aclose(self) -> None: - """Close connection (cleanup handled by aiohttp session)""" + """ + Close async client and cleanup resources. + + Note: Cleanup is handled automatically by aiohttp session context managers. + This method is provided for consistency with other async clients. + """ pass class AsyncFlowInstance: - """Asynchronous REST flow instance""" + """ + Asynchronous flow instance client. + + Provides async/await access to flow-scoped services including agents, + RAG queries, embeddings, and graph queries. All operations return complete + responses (non-streaming). + + Note: For streaming support, use AsyncSocketFlowInstance instead. + """ def __init__(self, flow: AsyncFlow, flow_id: str): + """ + Initialize async flow instance. + + Args: + flow: Parent AsyncFlow client + flow_id: Flow identifier + """ self.flow = flow self.flow_id = flow_id async def request(self, service: str, request_data: Dict[str, Any]) -> Dict[str, Any]: - """Make request to flow-scoped service""" + """ + Make request to a flow-scoped service. + + Internal method for calling services within this flow instance. + + Args: + service: Service name (e.g., "agent", "graph-rag", "triples") + request_data: Service request payload + + Returns: + dict: Service response object + + Raises: + ProtocolException: If request fails or response is invalid + ApplicationException: If service returns an error + """ return await self.flow.request(f"flow/{self.flow_id}/service/{service}", request_data) async def agent(self, question: str, user: str, state: Optional[Dict] = None, group: Optional[str] = None, history: Optional[List] = None, **kwargs: Any) -> Dict[str, Any]: - """Execute agent (non-streaming, use async_socket for streaming)""" + """ + Execute an agent operation (non-streaming). + + Runs an agent to answer a question, with optional conversation state and + history. Returns the complete response after the agent has finished + processing. + + Note: This method does not support streaming. For real-time agent thoughts + and observations, use AsyncSocketFlowInstance.agent() instead. + + Args: + question: User question or instruction + user: User identifier + state: Optional state dictionary for conversation context + group: Optional group identifier for session management + history: Optional conversation history list + **kwargs: Additional service-specific parameters + + Returns: + dict: Complete agent response including answer and metadata + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Execute agent + result = await flow.agent( + question="What is the capital of France?", + user="trustgraph" + ) + print(f"Answer: {result.get('response')}") + ``` + """ request_data = { "question": question, "user": user, @@ -148,7 +435,36 @@ class AsyncFlowInstance: return await self.request("agent", request_data) async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> str: - """Text completion (non-streaming, use async_socket for streaming)""" + """ + Generate text completion (non-streaming). + + Generates a text response from an LLM given a system prompt and user prompt. + Returns the complete response text. + + Note: This method does not support streaming. For streaming text generation, + use AsyncSocketFlowInstance.text_completion() instead. + + Args: + system: System prompt defining the LLM's behavior + prompt: User prompt or question + **kwargs: Additional service-specific parameters + + Returns: + str: Complete generated text response + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Generate text + response = await flow.text_completion( + system="You are a helpful assistant.", + prompt="Explain quantum computing in simple terms." + ) + print(response) + ``` + """ request_data = { "system": system, "prompt": prompt, @@ -162,7 +478,43 @@ class AsyncFlowInstance: async def graph_rag(self, query: str, user: str, collection: str, max_subgraph_size: int = 1000, max_subgraph_count: int = 5, max_entity_distance: int = 3, **kwargs: Any) -> str: - """Graph RAG (non-streaming, use async_socket for streaming)""" + """ + Execute graph-based RAG query (non-streaming). + + Performs Retrieval-Augmented Generation using knowledge graph data. + Identifies relevant entities and their relationships, then generates a + response grounded in the graph structure. Returns complete response. + + Note: This method does not support streaming. For streaming RAG responses, + use AsyncSocketFlowInstance.graph_rag() instead. + + Args: + query: User query text + user: User identifier + collection: Collection identifier containing the knowledge graph + max_subgraph_size: Maximum number of triples per subgraph (default: 1000) + max_subgraph_count: Maximum number of subgraphs to retrieve (default: 5) + max_entity_distance: Maximum graph distance for entity expansion (default: 3) + **kwargs: Additional service-specific parameters + + Returns: + str: Complete generated response grounded in graph data + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Query knowledge graph + response = await flow.graph_rag( + query="What are the relationships between these entities?", + user="trustgraph", + collection="medical-kb", + max_subgraph_count=3 + ) + print(response) + ``` + """ request_data = { "query": query, "user": user, @@ -179,7 +531,41 @@ class AsyncFlowInstance: async def document_rag(self, query: str, user: str, collection: str, doc_limit: int = 10, **kwargs: Any) -> str: - """Document RAG (non-streaming, use async_socket for streaming)""" + """ + Execute document-based RAG query (non-streaming). + + Performs Retrieval-Augmented Generation using document embeddings. + Retrieves relevant document chunks via semantic search, then generates + a response grounded in the retrieved documents. Returns complete response. + + Note: This method does not support streaming. For streaming RAG responses, + use AsyncSocketFlowInstance.document_rag() instead. + + Args: + query: User query text + user: User identifier + collection: Collection identifier containing documents + doc_limit: Maximum number of document chunks to retrieve (default: 10) + **kwargs: Additional service-specific parameters + + Returns: + str: Complete generated response grounded in document data + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Query documents + response = await flow.document_rag( + query="What does the documentation say about authentication?", + user="trustgraph", + collection="docs", + doc_limit=5 + ) + print(response) + ``` + """ request_data = { "query": query, "user": user, @@ -193,7 +579,39 @@ class AsyncFlowInstance: return result.get("response", "") async def graph_embeddings_query(self, text: str, user: str, collection: str, limit: int = 10, **kwargs: Any): - """Query graph embeddings for semantic search""" + """ + Query graph embeddings for semantic entity search. + + Performs semantic search over graph entity embeddings to find entities + most relevant to the input text. Returns entities ranked by similarity. + + Args: + text: Query text for semantic search + user: User identifier + collection: Collection identifier containing graph embeddings + limit: Maximum number of results to return (default: 10) + **kwargs: Additional service-specific parameters + + Returns: + dict: Response containing ranked entity matches with similarity scores + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Find related entities + results = await flow.graph_embeddings_query( + text="machine learning algorithms", + user="trustgraph", + collection="tech-kb", + limit=5 + ) + + for entity in results.get("entities", []): + print(f"{entity['name']}: {entity['score']}") + ``` + """ request_data = { "text": text, "user": user, @@ -205,14 +623,72 @@ class AsyncFlowInstance: return await self.request("graph-embeddings", request_data) async def embeddings(self, text: str, **kwargs: Any): - """Generate text embeddings""" + """ + Generate embeddings for input text. + + Converts text into a numerical vector representation using the flow's + configured embedding model. Useful for semantic search and similarity + comparisons. + + Args: + text: Input text to embed + **kwargs: Additional service-specific parameters + + Returns: + dict: Response containing embedding vector and metadata + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Generate embeddings + result = await flow.embeddings(text="Sample text to embed") + vector = result.get("embedding") + print(f"Embedding dimension: {len(vector)}") + ``` + """ request_data = {"text": text} request_data.update(kwargs) return await self.request("embeddings", request_data) async def triples_query(self, s=None, p=None, o=None, user=None, collection=None, limit=100, **kwargs: Any): - """Triple pattern query""" + """ + Query RDF triples using pattern matching. + + Searches for triples matching the specified subject, predicate, and/or + object patterns. Patterns use None as a wildcard to match any value. + + Args: + s: Subject pattern (None for wildcard) + p: Predicate pattern (None for wildcard) + o: Object pattern (None for wildcard) + user: User identifier (None for all users) + collection: Collection identifier (None for all collections) + limit: Maximum number of triples to return (default: 100) + **kwargs: Additional service-specific parameters + + Returns: + dict: Response containing matching triples + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Find all triples with a specific predicate + results = await flow.triples_query( + p="knows", + user="trustgraph", + collection="social", + limit=50 + ) + + for triple in results.get("triples", []): + print(f"{triple['s']} knows {triple['o']}") + ``` + """ request_data = {"limit": limit} if s is not None: request_data["s"] = str(s) @@ -230,7 +706,50 @@ class AsyncFlowInstance: async def objects_query(self, query: str, user: str, collection: str, variables: Optional[Dict] = None, operation_name: Optional[str] = None, **kwargs: Any): - """GraphQL query""" + """ + Execute a GraphQL query on stored objects. + + Queries structured data objects using GraphQL syntax. Supports complex + queries with variables and named operations. + + Args: + query: GraphQL query string + user: User identifier + collection: Collection identifier containing objects + variables: Optional GraphQL query variables + operation_name: Optional operation name for multi-operation queries + **kwargs: Additional service-specific parameters + + Returns: + dict: GraphQL response with data and/or errors + + Example: + ```python + async_flow = await api.async_flow() + flow = async_flow.id("default") + + # Execute GraphQL query + query = ''' + query GetUsers($status: String!) { + users(status: $status) { + id + name + email + } + } + ''' + + result = await flow.objects_query( + query=query, + user="trustgraph", + collection="users", + variables={"status": "active"} + ) + + for user in result.get("data", {}).get("users", []): + print(f"{user['name']}: {user['email']}") + ``` + """ request_data = { "query": query, "user": user, diff --git a/trustgraph-base/trustgraph/api/bulk_client.py b/trustgraph-base/trustgraph/api/bulk_client.py index a119668d..a2796332 100644 --- a/trustgraph-base/trustgraph/api/bulk_client.py +++ b/trustgraph-base/trustgraph/api/bulk_client.py @@ -1,3 +1,10 @@ +""" +TrustGraph Synchronous Bulk Operations Client + +This module provides synchronous bulk import/export operations via WebSocket +for efficient transfer of large datasets including triples, embeddings, +entity contexts, and objects. +""" import json import asyncio @@ -9,9 +16,24 @@ from . exceptions import ProtocolException class BulkClient: - """Synchronous bulk operations client""" + """ + Synchronous bulk operations client for import/export. + + Provides efficient bulk data transfer via WebSocket for large datasets. + Wraps async WebSocket operations with synchronous generators for ease of use. + + Note: For true async support, use AsyncBulkClient instead. + """ def __init__(self, url: str, timeout: int, token: Optional[str]) -> None: + """ + Initialize synchronous bulk client. + + Args: + url: Base URL for TrustGraph API (HTTP/HTTPS will be converted to WS/WSS) + timeout: WebSocket timeout in seconds + token: Optional bearer token for authentication + """ self.url: str = self._convert_to_ws_url(url) self.timeout: int = timeout self.token: Optional[str] = token @@ -41,7 +63,32 @@ class BulkClient: return loop.run_until_complete(coro) def import_triples(self, flow: str, triples: Iterator[Triple], **kwargs: Any) -> None: - """Bulk import triples via WebSocket""" + """ + Bulk import RDF triples into a flow. + + Efficiently uploads large numbers of triples via WebSocket streaming. + + Args: + flow: Flow identifier + triples: Iterator yielding Triple objects + **kwargs: Additional parameters (reserved for future use) + + Example: + ```python + from trustgraph.api import Triple + + bulk = api.bulk() + + # Generate triples to import + def triple_generator(): + yield Triple(s="subj1", p="pred", o="obj1") + yield Triple(s="subj2", p="pred", o="obj2") + # ... more triples + + # Import triples + bulk.import_triples(flow="default", triples=triple_generator()) + ``` + """ self._run_async(self._import_triples_async(flow, triples)) async def _import_triples_async(self, flow: str, triples: Iterator[Triple]) -> None: @@ -60,7 +107,27 @@ class BulkClient: await websocket.send(json.dumps(message)) def export_triples(self, flow: str, **kwargs: Any) -> Iterator[Triple]: - """Bulk export triples via WebSocket""" + """ + Bulk export RDF triples from a flow. + + Efficiently downloads all triples via WebSocket streaming. + + Args: + flow: Flow identifier + **kwargs: Additional parameters (reserved for future use) + + Returns: + Iterator[Triple]: Stream of Triple objects + + Example: + ```python + bulk = api.bulk() + + # Export and process triples + for triple in bulk.export_triples(flow="default"): + print(f"{triple.s} -> {triple.p} -> {triple.o}") + ``` + """ async_gen = self._export_triples_async(flow) try: @@ -101,7 +168,32 @@ class BulkClient: ) def import_graph_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None: - """Bulk import graph embeddings via WebSocket""" + """ + Bulk import graph embeddings into a flow. + + Efficiently uploads graph entity embeddings via WebSocket streaming. + + Args: + flow: Flow identifier + embeddings: Iterator yielding embedding dictionaries + **kwargs: Additional parameters (reserved for future use) + + Example: + ```python + bulk = api.bulk() + + # Generate embeddings to import + def embedding_generator(): + yield {"entity": "entity1", "embedding": [0.1, 0.2, ...]} + yield {"entity": "entity2", "embedding": [0.3, 0.4, ...]} + # ... more embeddings + + bulk.import_graph_embeddings( + flow="default", + embeddings=embedding_generator() + ) + ``` + """ self._run_async(self._import_graph_embeddings_async(flow, embeddings)) async def _import_graph_embeddings_async(self, flow: str, embeddings: Iterator[Dict[str, Any]]) -> None: @@ -115,7 +207,29 @@ class BulkClient: await websocket.send(json.dumps(embedding)) def export_graph_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]: - """Bulk export graph embeddings via WebSocket""" + """ + Bulk export graph embeddings from a flow. + + Efficiently downloads all graph entity embeddings via WebSocket streaming. + + Args: + flow: Flow identifier + **kwargs: Additional parameters (reserved for future use) + + Returns: + Iterator[Dict[str, Any]]: Stream of embedding dictionaries + + Example: + ```python + bulk = api.bulk() + + # Export and process embeddings + for embedding in bulk.export_graph_embeddings(flow="default"): + entity = embedding.get("entity") + vector = embedding.get("embedding") + print(f"{entity}: {len(vector)} dimensions") + ``` + """ async_gen = self._export_graph_embeddings_async(flow) try: @@ -151,7 +265,33 @@ class BulkClient: yield json.loads(raw_message) def import_document_embeddings(self, flow: str, embeddings: Iterator[Dict[str, Any]], **kwargs: Any) -> None: - """Bulk import document embeddings via WebSocket""" + """ + Bulk import document embeddings into a flow. + + Efficiently uploads document chunk embeddings via WebSocket streaming + for use in document RAG queries. + + Args: + flow: Flow identifier + embeddings: Iterator yielding embedding dictionaries + **kwargs: Additional parameters (reserved for future use) + + Example: + ```python + bulk = api.bulk() + + # Generate document embeddings to import + def doc_embedding_generator(): + yield {"id": "doc1-chunk1", "embedding": [0.1, 0.2, ...]} + yield {"id": "doc1-chunk2", "embedding": [0.3, 0.4, ...]} + # ... more embeddings + + bulk.import_document_embeddings( + flow="default", + embeddings=doc_embedding_generator() + ) + ``` + """ self._run_async(self._import_document_embeddings_async(flow, embeddings)) async def _import_document_embeddings_async(self, flow: str, embeddings: Iterator[Dict[str, Any]]) -> None: @@ -165,7 +305,29 @@ class BulkClient: await websocket.send(json.dumps(embedding)) def export_document_embeddings(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]: - """Bulk export document embeddings via WebSocket""" + """ + Bulk export document embeddings from a flow. + + Efficiently downloads all document chunk embeddings via WebSocket streaming. + + Args: + flow: Flow identifier + **kwargs: Additional parameters (reserved for future use) + + Returns: + Iterator[Dict[str, Any]]: Stream of embedding dictionaries + + Example: + ```python + bulk = api.bulk() + + # Export and process document embeddings + for embedding in bulk.export_document_embeddings(flow="default"): + doc_id = embedding.get("id") + vector = embedding.get("embedding") + print(f"{doc_id}: {len(vector)} dimensions") + ``` + """ async_gen = self._export_document_embeddings_async(flow) try: @@ -201,7 +363,34 @@ class BulkClient: yield json.loads(raw_message) def import_entity_contexts(self, flow: str, contexts: Iterator[Dict[str, Any]], **kwargs: Any) -> None: - """Bulk import entity contexts via WebSocket""" + """ + Bulk import entity contexts into a flow. + + Efficiently uploads entity context information via WebSocket streaming. + Entity contexts provide additional textual context about graph entities + for improved RAG performance. + + Args: + flow: Flow identifier + contexts: Iterator yielding context dictionaries + **kwargs: Additional parameters (reserved for future use) + + Example: + ```python + bulk = api.bulk() + + # Generate entity contexts to import + def context_generator(): + yield {"entity": "entity1", "context": "Description of entity1..."} + yield {"entity": "entity2", "context": "Description of entity2..."} + # ... more contexts + + bulk.import_entity_contexts( + flow="default", + contexts=context_generator() + ) + ``` + """ self._run_async(self._import_entity_contexts_async(flow, contexts)) async def _import_entity_contexts_async(self, flow: str, contexts: Iterator[Dict[str, Any]]) -> None: @@ -215,7 +404,29 @@ class BulkClient: await websocket.send(json.dumps(context)) def export_entity_contexts(self, flow: str, **kwargs: Any) -> Iterator[Dict[str, Any]]: - """Bulk export entity contexts via WebSocket""" + """ + Bulk export entity contexts from a flow. + + Efficiently downloads all entity context information via WebSocket streaming. + + Args: + flow: Flow identifier + **kwargs: Additional parameters (reserved for future use) + + Returns: + Iterator[Dict[str, Any]]: Stream of context dictionaries + + Example: + ```python + bulk = api.bulk() + + # Export and process entity contexts + for context in bulk.export_entity_contexts(flow="default"): + entity = context.get("entity") + text = context.get("context") + print(f"{entity}: {text[:100]}...") + ``` + """ async_gen = self._export_entity_contexts_async(flow) try: @@ -251,7 +462,33 @@ class BulkClient: yield json.loads(raw_message) def import_objects(self, flow: str, objects: Iterator[Dict[str, Any]], **kwargs: Any) -> None: - """Bulk import objects via WebSocket""" + """ + Bulk import structured objects into a flow. + + Efficiently uploads structured data objects via WebSocket streaming + for use in GraphQL queries. + + Args: + flow: Flow identifier + objects: Iterator yielding object dictionaries + **kwargs: Additional parameters (reserved for future use) + + Example: + ```python + bulk = api.bulk() + + # Generate objects to import + def object_generator(): + yield {"id": "obj1", "name": "Object 1", "value": 100} + yield {"id": "obj2", "name": "Object 2", "value": 200} + # ... more objects + + bulk.import_objects( + flow="default", + objects=object_generator() + ) + ``` + """ self._run_async(self._import_objects_async(flow, objects)) async def _import_objects_async(self, flow: str, objects: Iterator[Dict[str, Any]]) -> None: diff --git a/trustgraph-base/trustgraph/api/collection.py b/trustgraph-base/trustgraph/api/collection.py index 5a1f0850..414d07db 100644 --- a/trustgraph-base/trustgraph/api/collection.py +++ b/trustgraph-base/trustgraph/api/collection.py @@ -1,3 +1,11 @@ +""" +TrustGraph Collection Management + +This module provides interfaces for managing data collections in TrustGraph. +Collections provide logical grouping and isolation for documents and knowledge +graph data. +""" + import datetime import logging @@ -7,14 +15,71 @@ from . exceptions import * logger = logging.getLogger(__name__) class Collection: + """ + Collection management client. + + Provides methods for managing data collections, including listing, + updating metadata, and deleting collections. Collections organize + documents and knowledge graph data into logical groupings for + isolation and access control. + """ def __init__(self, api): + """ + Initialize Collection client. + + Args: + api: Parent Api instance for making requests + """ self.api = api def request(self, request): + """ + Make a collection-scoped API request. + + Args: + request: Request payload dictionary + + Returns: + dict: Response object + """ return self.api.request(f"collection-management", request) def list_collections(self, user, tag_filter=None): + """ + List all collections for a user. + + Retrieves metadata for all collections owned by the specified user, + with optional filtering by tags. + + Args: + user: User identifier + tag_filter: Optional list of tags to filter collections (default: None) + + Returns: + list[CollectionMetadata]: List of collection metadata objects + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + collection = api.collection() + + # List all collections + all_colls = collection.list_collections(user="trustgraph") + for coll in all_colls: + print(f"{coll.collection}: {coll.name}") + print(f" Description: {coll.description}") + print(f" Tags: {', '.join(coll.tags)}") + + # List collections with specific tags + research_colls = collection.list_collections( + user="trustgraph", + tag_filter=["research", "published"] + ) + ``` + """ input = { "operation": "list-collections", @@ -50,6 +115,46 @@ class Collection: raise ProtocolException(f"Response not formatted correctly") def update_collection(self, user, collection, name=None, description=None, tags=None): + """ + Update collection metadata. + + Updates the name, description, and/or tags for an existing collection. + Only provided fields are updated; others remain unchanged. + + Args: + user: User identifier + collection: Collection identifier + name: New collection name (optional) + description: New collection description (optional) + tags: New list of tags (optional) + + Returns: + CollectionMetadata: Updated collection metadata, or None if not found + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + collection_api = api.collection() + + # Update collection metadata + updated = collection_api.update_collection( + user="trustgraph", + collection="default", + name="Default Collection", + description="Main data collection for general use", + tags=["default", "production"] + ) + + # Update only specific fields + updated = collection_api.update_collection( + user="trustgraph", + collection="research", + description="Updated description" + ) + ``` + """ input = { "operation": "update-collection", @@ -82,6 +187,29 @@ class Collection: raise ProtocolException(f"Response not formatted correctly") def delete_collection(self, user, collection): + """ + Delete a collection. + + Removes a collection and all its associated data from the system. + + Args: + user: User identifier + collection: Collection identifier to delete + + Returns: + dict: Empty response object + + Example: + ```python + collection_api = api.collection() + + # Delete a collection + collection_api.delete_collection( + user="trustgraph", + collection="old-collection" + ) + ``` + """ input = { "operation": "delete-collection", diff --git a/trustgraph-base/trustgraph/api/config.py b/trustgraph-base/trustgraph/api/config.py index cd50ca6c..c8c8d5bb 100644 --- a/trustgraph-base/trustgraph/api/config.py +++ b/trustgraph-base/trustgraph/api/config.py @@ -1,3 +1,9 @@ +""" +TrustGraph Configuration Management + +This module provides interfaces for managing TrustGraph configuration settings, +including retrieving, updating, and deleting configuration values. +""" import logging @@ -7,14 +13,67 @@ from . types import ConfigValue logger = logging.getLogger(__name__) class Config: + """ + Configuration management client. + + Provides methods for managing TrustGraph configuration settings across + different types (llm, embedding, etc.), with support for get, put, delete, + and list operations. + """ def __init__(self, api): + """ + Initialize Config client. + + Args: + api: Parent Api instance for making requests + """ self.api = api def request(self, request): + """ + Make a configuration-scoped API request. + + Args: + request: Request payload dictionary + + Returns: + dict: Response object + """ return self.api.request("config", request) def get(self, keys): + """ + Get configuration values for specified keys. + + Retrieves the configuration values for one or more configuration keys. + + Args: + keys: List of ConfigKey objects specifying which values to retrieve + + Returns: + list[ConfigValue]: List of configuration values + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + from trustgraph.api import ConfigKey + + config = api.config() + + # Get specific configuration values + values = config.get([ + ConfigKey(type="llm", key="model"), + ConfigKey(type="llm", key="temperature"), + ConfigKey(type="embedding", key="model") + ]) + + for val in values: + print(f"{val.type}.{val.key} = {val.value}") + ``` + """ # The input consists of system and prompt strings input = { @@ -41,6 +100,28 @@ class Config: raise ProtocolException("Response not formatted correctly") def put(self, values): + """ + Set configuration values. + + Updates or creates configuration values for the specified keys. + + Args: + values: List of ConfigValue objects with type, key, and value + + Example: + ```python + from trustgraph.api import ConfigValue + + config = api.config() + + # Set configuration values + config.put([ + ConfigValue(type="llm", key="model", value="gpt-4"), + ConfigValue(type="llm", key="temperature", value="0.7"), + ConfigValue(type="embedding", key="model", value="text-embedding-3-small") + ]) + ``` + """ # The input consists of system and prompt strings input = { @@ -54,6 +135,27 @@ class Config: self.request(input) def delete(self, keys): + """ + Delete configuration values. + + Removes configuration values for the specified keys. + + Args: + keys: List of ConfigKey objects specifying which values to delete + + Example: + ```python + from trustgraph.api import ConfigKey + + config = api.config() + + # Delete configuration values + config.delete([ + ConfigKey(type="llm", key="old-setting"), + ConfigKey(type="embedding", key="deprecated") + ]) + ``` + """ # The input consists of system and prompt strings input = { @@ -67,6 +169,31 @@ class Config: self.request(input) def list(self, type): + """ + List all configuration keys for a given type. + + Retrieves a list of all configuration key names within a specific + configuration type. + + Args: + type: Configuration type (e.g., "llm", "embedding", "storage") + + Returns: + list[str]: List of configuration key names + + Example: + ```python + config = api.config() + + # List all LLM configuration keys + llm_keys = config.list(type="llm") + print(f"LLM configuration keys: {llm_keys}") + + # List all embedding configuration keys + embedding_keys = config.list(type="embedding") + print(f"Embedding configuration keys: {embedding_keys}") + ``` + """ # The input consists of system and prompt strings input = { @@ -77,6 +204,36 @@ class Config: return self.request(input)["directory"] def get_values(self, type): + """ + Get all configuration values for a given type. + + Retrieves all configuration key-value pairs within a specific + configuration type. + + Args: + type: Configuration type (e.g., "llm", "embedding", "storage") + + Returns: + list[ConfigValue]: List of all configuration values for the type + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + config = api.config() + + # Get all LLM configuration + llm_config = config.get_values(type="llm") + for val in llm_config: + print(f"{val.key} = {val.value}") + + # Get all embedding configuration + embedding_config = config.get_values(type="embedding") + for val in embedding_config: + print(f"{val.key} = {val.value}") + ``` + """ # The input consists of system and prompt strings input = { @@ -99,6 +256,28 @@ class Config: raise ProtocolException(f"Response not formatted correctly") def all(self): + """ + Get complete configuration and version. + + Retrieves the entire configuration object along with its version number. + + Returns: + tuple: (config_dict, version_string) - Complete configuration and version + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + config = api.config() + + # Get complete configuration + config_data, version = config.all() + + print(f"Configuration version: {version}") + print(f"Configuration: {config_data}") + ``` + """ # The input consists of system and prompt strings input = { diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index 142a699b..d06a6327 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -1,3 +1,10 @@ +""" +TrustGraph Flow Management + +This module provides interfaces for managing and executing TrustGraph flows. +Flows are the primary execution units that provide access to various services +including LLM operations, RAG queries, knowledge graph management, and more. +""" import json import base64 @@ -11,11 +18,38 @@ def to_value(x): return Literal(x["v"]) class Flow: + """ + Flow management client for blueprint and flow instance operations. + + This class provides methods for managing flow blueprints (templates) and + flow instances (running flows). Blueprints define the structure and + parameters of flows, while instances represent active flows that can + execute services. + """ def __init__(self, api): + """ + Initialize Flow client. + + Args: + api: Parent Api instance for making requests + """ self.api = api def request(self, path=None, request=None): + """ + Make a flow-scoped API request. + + Args: + path: Optional path suffix for flow endpoints + request: Request payload dictionary + + Returns: + dict: Response object + + Raises: + RuntimeError: If request parameter is not specified + """ if request is None: raise RuntimeError("request must be specified") @@ -26,9 +60,39 @@ class Flow: return self.api.request(f"flow", request) def id(self, id="default"): + """ + Get a FlowInstance for executing operations on a specific flow. + + Args: + id: Flow identifier (default: "default") + + Returns: + FlowInstance: Flow instance for service operations + + Example: + ```python + flow = api.flow().id("my-flow") + response = flow.text_completion( + system="You are helpful", + prompt="Hello" + ) + ``` + """ return FlowInstance(api=self, id=id) def list_blueprints(self): + """ + List all available flow blueprints. + + Returns: + list[str]: List of blueprint names + + Example: + ```python + blueprints = api.flow().list_blueprints() + print(blueprints) # ['default', 'custom-flow', ...] + ``` + """ # The input consists of system and prompt strings input = { @@ -38,6 +102,21 @@ class Flow: return self.request(request = input)["blueprint-names"] def get_blueprint(self, blueprint_name): + """ + Get a flow blueprint definition by name. + + Args: + blueprint_name: Name of the blueprint to retrieve + + Returns: + dict: Blueprint definition as a dictionary + + Example: + ```python + blueprint = api.flow().get_blueprint("default") + print(blueprint) # Blueprint configuration + ``` + """ # The input consists of system and prompt strings input = { @@ -48,6 +127,22 @@ class Flow: return json.loads(self.request(request = input)["blueprint-definition"]) def put_blueprint(self, blueprint_name, definition): + """ + Create or update a flow blueprint. + + Args: + blueprint_name: Name for the blueprint + definition: Blueprint definition dictionary + + Example: + ```python + definition = { + "services": ["text-completion", "graph-rag"], + "parameters": {"model": "gpt-4"} + } + api.flow().put_blueprint("my-blueprint", definition) + ``` + """ # The input consists of system and prompt strings input = { @@ -59,6 +154,17 @@ class Flow: self.request(request = input) def delete_blueprint(self, blueprint_name): + """ + Delete a flow blueprint. + + Args: + blueprint_name: Name of the blueprint to delete + + Example: + ```python + api.flow().delete_blueprint("old-blueprint") + ``` + """ # The input consists of system and prompt strings input = { @@ -69,6 +175,18 @@ class Flow: self.request(request = input) def list(self): + """ + List all active flow instances. + + Returns: + list[str]: List of flow instance IDs + + Example: + ```python + flows = api.flow().list() + print(flows) # ['default', 'flow-1', 'flow-2', ...] + ``` + """ # The input consists of system and prompt strings input = { @@ -78,6 +196,21 @@ class Flow: return self.request(request = input)["flow-ids"] def get(self, id): + """ + Get the definition of a running flow instance. + + Args: + id: Flow instance ID + + Returns: + dict: Flow instance definition + + Example: + ```python + flow_def = api.flow().get("default") + print(flow_def) + ``` + """ # The input consists of system and prompt strings input = { @@ -88,6 +221,25 @@ class Flow: return json.loads(self.request(request = input)["flow"]) def start(self, blueprint_name, id, description, parameters=None): + """ + Start a new flow instance from a blueprint. + + Args: + blueprint_name: Name of the blueprint to instantiate + id: Unique identifier for the flow instance + description: Human-readable description + parameters: Optional parameters dictionary + + Example: + ```python + api.flow().start( + blueprint_name="default", + id="my-flow", + description="My custom flow", + parameters={"model": "gpt-4"} + ) + ``` + """ # The input consists of system and prompt strings input = { @@ -103,6 +255,17 @@ class Flow: self.request(request = input) def stop(self, id): + """ + Stop a running flow instance. + + Args: + id: Flow instance ID to stop + + Example: + ```python + api.flow().stop("my-flow") + ``` + """ # The input consists of system and prompt strings input = { @@ -111,18 +274,70 @@ class Flow: } self.request(request = input) - + class FlowInstance: + """ + Flow instance client for executing services on a specific flow. + + This class provides access to all TrustGraph services including: + - Text completion and embeddings + - Agent operations with state management + - Graph and document RAG queries + - Knowledge graph operations (triples, objects) + - Document loading and processing + - Natural language to GraphQL query conversion + - Structured data analysis and schema detection + - MCP tool execution + - Prompt templating + + Services are accessed through a running flow instance identified by ID. + """ def __init__(self, api, id): + """ + Initialize FlowInstance. + + Args: + api: Parent Flow client + id: Flow instance identifier + """ self.api = api self.id = id def request(self, path, request): + """ + Make a service request on this flow instance. + Args: + path: Service path (e.g., "service/text-completion") + request: Request payload dictionary + + Returns: + dict: Service response + """ return self.api.request(path = f"{self.id}/{path}", request = request) def text_completion(self, system, prompt): + """ + Execute text completion using the flow's LLM. + + Args: + system: System prompt defining the assistant's behavior + prompt: User prompt/question + + Returns: + str: Generated response text + + Example: + ```python + flow = api.flow().id("default") + response = flow.text_completion( + system="You are a helpful assistant", + prompt="What is quantum computing?" + ) + print(response) + ``` + """ # The input consists of system and prompt strings input = { @@ -136,6 +351,44 @@ class FlowInstance: )["response"] def agent(self, question, user="trustgraph", state=None, group=None, history=None): + """ + Execute an agent operation with reasoning and tool use capabilities. + + Agents can perform multi-step reasoning, use tools, and maintain conversation + state across interactions. This is a synchronous non-streaming version. + + Args: + question: User question or instruction + user: User identifier (default: "trustgraph") + state: Optional state dictionary for stateful conversations + group: Optional group identifier for multi-user contexts + history: Optional conversation history as list of message dicts + + Returns: + str: Agent's final answer + + Example: + ```python + flow = api.flow().id("default") + + # Simple question + answer = flow.agent( + question="What is the capital of France?", + user="trustgraph" + ) + + # With conversation history + history = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi! How can I help?"} + ] + answer = flow.agent( + question="Tell me about Paris", + user="trustgraph", + history=history + ) + ``` + """ # The input consists of a question and optional context input = { @@ -164,6 +417,37 @@ class FlowInstance: entity_limit=50, triple_limit=30, max_subgraph_size=150, max_path_length=2, ): + """ + Execute graph-based Retrieval-Augmented Generation (RAG) query. + + Graph RAG uses knowledge graph structure to find relevant context by + traversing entity relationships, then generates a response using an LLM. + + Args: + query: Natural language query + user: User/keyspace identifier (default: "trustgraph") + collection: Collection identifier (default: "default") + entity_limit: Maximum entities to retrieve (default: 50) + triple_limit: Maximum triples per entity (default: 30) + max_subgraph_size: Maximum total triples in subgraph (default: 150) + max_path_length: Maximum traversal depth (default: 2) + + Returns: + str: Generated response incorporating graph context + + Example: + ```python + flow = api.flow().id("default") + response = flow.graph_rag( + query="Tell me about Marie Curie's discoveries", + user="trustgraph", + collection="scientists", + entity_limit=20, + max_path_length=3 + ) + print(response) + ``` + """ # The input consists of a question input = { @@ -185,6 +469,33 @@ class FlowInstance: self, query, user="trustgraph", collection="default", doc_limit=10, ): + """ + Execute document-based Retrieval-Augmented Generation (RAG) query. + + Document RAG uses vector embeddings to find relevant document chunks, + then generates a response using an LLM with those chunks as context. + + Args: + query: Natural language query + user: User/keyspace identifier (default: "trustgraph") + collection: Collection identifier (default: "default") + doc_limit: Maximum document chunks to retrieve (default: 10) + + Returns: + str: Generated response incorporating document context + + Example: + ```python + flow = api.flow().id("default") + response = flow.document_rag( + query="Summarize the key findings", + user="trustgraph", + collection="research-papers", + doc_limit=5 + ) + print(response) + ``` + """ # The input consists of a question input = { @@ -200,6 +511,25 @@ class FlowInstance: )["response"] def embeddings(self, text): + """ + Generate vector embeddings for text. + + Converts text into dense vector representations suitable for semantic + search and similarity comparison. + + Args: + text: Input text to embed + + Returns: + list[float]: Vector embedding + + Example: + ```python + flow = api.flow().id("default") + vectors = flow.embeddings("quantum computing") + print(f"Embedding dimension: {len(vectors)}") + ``` + """ # The input consists of a text block input = { @@ -212,6 +542,32 @@ class FlowInstance: )["vectors"] def graph_embeddings_query(self, text, user, collection, limit=10): + """ + Query knowledge graph entities using semantic similarity. + + Finds entities in the knowledge graph whose descriptions are semantically + similar to the input text, using vector embeddings. + + Args: + text: Query text for semantic search + user: User/keyspace identifier + collection: Collection identifier + limit: Maximum number of results (default: 10) + + Returns: + dict: Query results with similar entities + + Example: + ```python + flow = api.flow().id("default") + results = flow.graph_embeddings_query( + text="physicist who discovered radioactivity", + user="trustgraph", + collection="scientists", + limit=5 + ) + ``` + """ # Query graph embeddings for semantic search input = { @@ -227,6 +583,39 @@ class FlowInstance: ) def prompt(self, id, variables): + """ + Execute a prompt template with variable substitution. + + Prompt templates allow reusable prompt patterns with dynamic variable + substitution, useful for consistent prompt engineering. + + Args: + id: Prompt template identifier + variables: Dictionary of variable name to value mappings + + Returns: + str or dict: Rendered prompt result (text or structured object) + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + flow = api.flow().id("default") + + # Text template + result = flow.prompt( + id="summarize-template", + variables={"topic": "quantum computing", "length": "brief"} + ) + + # Structured template + result = flow.prompt( + id="extract-entities", + variables={"text": "Marie Curie won Nobel Prizes"} + ) + ``` + """ input = { "id": id, @@ -252,6 +641,33 @@ class FlowInstance: raise ProtocolException("Response not formatted correctly") def mcp_tool(self, name, parameters={}): + """ + Execute a Model Context Protocol (MCP) tool. + + MCP tools provide extensible functionality for agents and workflows, + allowing integration with external systems and services. + + Args: + name: Tool name/identifier + parameters: Tool parameters dictionary (default: {}) + + Returns: + str or dict: Tool execution result + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + flow = api.flow().id("default") + + # Execute a tool + result = flow.mcp_tool( + name="search-web", + parameters={"query": "latest AI news", "limit": 5} + ) + ``` + """ # The input consists of name and parameters input = { @@ -281,6 +697,46 @@ class FlowInstance: self, s=None, p=None, o=None, user=None, collection=None, limit=10000 ): + """ + Query knowledge graph triples using pattern matching. + + Searches for RDF triples matching the given subject, predicate, and/or + object patterns. Unspecified parameters act as wildcards. + + Args: + s: Subject URI (optional, use None for wildcard) + p: Predicate URI (optional, use None for wildcard) + o: Object URI or Literal (optional, use None for wildcard) + user: User/keyspace identifier (optional) + collection: Collection identifier (optional) + limit: Maximum results to return (default: 10000) + + Returns: + list[Triple]: List of matching Triple objects + + Raises: + RuntimeError: If s or p is not a Uri, or o is not Uri/Literal + + Example: + ```python + from trustgraph.knowledge import Uri, Literal + + flow = api.flow().id("default") + + # Find all triples about a specific subject + triples = flow.triples_query( + s=Uri("http://example.org/person/marie-curie"), + user="trustgraph", + collection="scientists" + ) + + # Find all instances of a specific relationship + triples = flow.triples_query( + p=Uri("http://example.org/ontology/discovered"), + limit=100 + ) + ``` + """ input = { "limit": limit @@ -325,6 +781,39 @@ class FlowInstance: self, document, id=None, metadata=None, user=None, collection=None, ): + """ + Load a binary document for processing. + + Uploads a document (PDF, DOCX, images, etc.) for extraction and + processing through the flow's document pipeline. + + Args: + document: Document content as bytes + id: Optional document identifier (auto-generated if None) + metadata: Optional metadata (list of Triples or object with emit method) + user: User/keyspace identifier (optional) + collection: Collection identifier (optional) + + Returns: + dict: Processing response + + Raises: + RuntimeError: If metadata is provided without id + + Example: + ```python + flow = api.flow().id("default") + + # Load a PDF document + with open("research.pdf", "rb") as f: + result = flow.load_document( + document=f.read(), + id="research-001", + user="trustgraph", + collection="papers" + ) + ``` + """ if id is None: @@ -372,6 +861,41 @@ class FlowInstance: self, text, id=None, metadata=None, charset="utf-8", user=None, collection=None, ): + """ + Load text content for processing. + + Uploads text content for extraction and processing through the flow's + text pipeline. + + Args: + text: Text content as bytes + id: Optional document identifier (auto-generated if None) + metadata: Optional metadata (list of Triples or object with emit method) + charset: Character encoding (default: "utf-8") + user: User/keyspace identifier (optional) + collection: Collection identifier (optional) + + Returns: + dict: Processing response + + Raises: + RuntimeError: If metadata is provided without id + + Example: + ```python + flow = api.flow().id("default") + + # Load text content + text_content = b"This is the document content..." + result = flow.load_text( + text=text_content, + id="text-001", + charset="utf-8", + user="trustgraph", + collection="documents" + ) + ``` + """ if id is None: @@ -417,6 +941,60 @@ class FlowInstance: self, query, user="trustgraph", collection="default", variables=None, operation_name=None ): + """ + Execute a GraphQL query against structured objects in the knowledge graph. + + Queries structured data using GraphQL syntax, allowing complex queries + with filtering, aggregation, and relationship traversal. + + Args: + query: GraphQL query string + user: User/keyspace identifier (default: "trustgraph") + collection: Collection identifier (default: "default") + variables: Optional query variables dictionary + operation_name: Optional operation name for multi-operation documents + + Returns: + dict: GraphQL response with 'data', 'errors', and/or 'extensions' fields + + Raises: + ProtocolException: If system-level error occurs + + Example: + ```python + flow = api.flow().id("default") + + # Simple query + query = ''' + { + scientists(limit: 10) { + name + field + discoveries + } + } + ''' + result = flow.objects_query( + query=query, + user="trustgraph", + collection="scientists" + ) + + # Query with variables + query = ''' + query GetScientist($name: String!) { + scientists(name: $name) { + name + nobelPrizes + } + } + ''' + result = flow.objects_query( + query=query, + variables={"name": "Marie Curie"} + ) + ``` + """ # The input consists of a GraphQL query and optional variables input = { diff --git a/trustgraph-base/trustgraph/api/knowledge.py b/trustgraph-base/trustgraph/api/knowledge.py index 3c625057..23f6c9f2 100644 --- a/trustgraph-base/trustgraph/api/knowledge.py +++ b/trustgraph-base/trustgraph/api/knowledge.py @@ -1,3 +1,10 @@ +""" +TrustGraph Knowledge Graph Core Management + +This module provides interfaces for managing knowledge graph cores in TrustGraph. +KG cores are pre-built knowledge graph datasets that can be loaded and unloaded +into flows for use in queries and RAG operations. +""" import json import base64 @@ -10,15 +17,56 @@ def to_value(x): return Literal(x["v"]) class Knowledge: + """ + Knowledge graph core management client. + + Provides methods for managing knowledge graph cores, including listing + available cores, loading them into flows, and unloading them. KG cores + are pre-built knowledge graph datasets that enhance RAG capabilities. + """ def __init__(self, api): + """ + Initialize Knowledge client. + + Args: + api: Parent Api instance for making requests + """ self.api = api def request(self, request): + """ + Make a knowledge-scoped API request. + Args: + request: Request payload dictionary + + Returns: + dict: Response object + """ return self.api.request(f"knowledge", request) def list_kg_cores(self, user="trustgraph"): + """ + List all available knowledge graph cores. + + Retrieves the IDs of all KG cores available for the specified user. + + Args: + user: User identifier (default: "trustgraph") + + Returns: + list[str]: List of KG core identifiers + + Example: + ```python + knowledge = api.knowledge() + + # List available KG cores + cores = knowledge.list_kg_cores(user="trustgraph") + print(f"Available KG cores: {cores}") + ``` + """ # The input consists of system and prompt strings input = { @@ -29,6 +77,24 @@ class Knowledge: return self.request(request = input)["ids"] def delete_kg_core(self, id, user="trustgraph"): + """ + Delete a knowledge graph core. + + Removes a KG core from storage. This does not affect currently loaded + cores in flows. + + Args: + id: KG core identifier to delete + user: User identifier (default: "trustgraph") + + Example: + ```python + knowledge = api.knowledge() + + # Delete a KG core + knowledge.delete_kg_core(id="medical-kb-v1", user="trustgraph") + ``` + """ # The input consists of system and prompt strings input = { @@ -41,6 +107,39 @@ class Knowledge: def load_kg_core(self, id, user="trustgraph", flow="default", collection="default"): + """ + Load a knowledge graph core into a flow. + + Makes a KG core available for use in queries and RAG operations within + the specified flow and collection. + + Args: + id: KG core identifier to load + user: User identifier (default: "trustgraph") + flow: Flow instance to load into (default: "default") + collection: Collection to associate with (default: "default") + + Example: + ```python + knowledge = api.knowledge() + + # Load a medical knowledge base into the default flow + knowledge.load_kg_core( + id="medical-kb-v1", + user="trustgraph", + flow="default", + collection="medical" + ) + + # Now the flow can use this KG core for RAG queries + flow = api.flow().id("default") + response = flow.graph_rag( + query="What are the symptoms of diabetes?", + user="trustgraph", + collection="medical" + ) + ``` + """ # The input consists of system and prompt strings input = { @@ -54,6 +153,29 @@ class Knowledge: self.request(request = input) def unload_kg_core(self, id, user="trustgraph", flow="default"): + """ + Unload a knowledge graph core from a flow. + + Removes a KG core from active use in the specified flow, freeing + resources while keeping the core available in storage. + + Args: + id: KG core identifier to unload + user: User identifier (default: "trustgraph") + flow: Flow instance to unload from (default: "default") + + Example: + ```python + knowledge = api.knowledge() + + # Unload a KG core when no longer needed + knowledge.unload_kg_core( + id="medical-kb-v1", + user="trustgraph", + flow="default" + ) + ``` + """ # The input consists of system and prompt strings input = { diff --git a/trustgraph-base/trustgraph/api/library.py b/trustgraph-base/trustgraph/api/library.py index a08a9546..b068f627 100644 --- a/trustgraph-base/trustgraph/api/library.py +++ b/trustgraph-base/trustgraph/api/library.py @@ -1,3 +1,9 @@ +""" +TrustGraph Document Library Management + +This module provides interfaces for managing documents in the TrustGraph library, +including document storage, metadata management, and processing workflow coordination. +""" import datetime import time @@ -15,17 +21,79 @@ def to_value(x): return Literal(x["v"]) class Library: + """ + Document library management client. + + Provides methods for managing documents in the TrustGraph library, including + adding, retrieving, updating, and removing documents, as well as managing + document processing workflows. + """ def __init__(self, api): + """ + Initialize Library client. + + Args: + api: Parent Api instance for making requests + """ self.api = api def request(self, request): + """ + Make a library-scoped API request. + + Args: + request: Request payload dictionary + + Returns: + dict: Response object + """ return self.api.request(f"librarian", request) def add_document( self, document, id, metadata, user, title, comments, - kind="text/plain", tags=[], + kind="text/plain", tags=[], ): + """ + Add a document to the library. + + Stores a document with associated metadata in the library for + retrieval and processing. + + Args: + document: Document content as bytes + id: Document identifier (auto-generated if None) + metadata: Document metadata as list of Triple objects or object with emit method + user: User/owner identifier + title: Document title + comments: Document description or comments + kind: MIME type of the document (default: "text/plain") + tags: List of tags for categorization (default: []) + + Returns: + dict: Response from the add operation + + Raises: + RuntimeError: If metadata is provided without an id + + Example: + ```python + library = api.library() + + # Add a PDF document + with open("research.pdf", "rb") as f: + library.add_document( + document=f.read(), + id="research-001", + metadata=[], + user="trustgraph", + title="Research Paper", + comments="Key findings in quantum computing", + kind="application/pdf", + tags=["research", "physics"] + ) + ``` + """ if id is None: @@ -85,6 +153,31 @@ class Library: return self.request(input) def get_documents(self, user): + """ + List all documents for a user. + + Retrieves metadata for all documents owned by the specified user. + + Args: + user: User identifier + + Returns: + list[DocumentMetadata]: List of document metadata objects + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + library = api.library() + docs = library.get_documents(user="trustgraph") + + for doc in docs: + print(f"{doc.id}: {doc.title} ({doc.kind})") + print(f" Uploaded: {doc.time}") + print(f" Tags: {', '.join(doc.tags)}") + ``` + """ input = { "operation": "list-documents", @@ -119,6 +212,29 @@ class Library: raise ProtocolException(f"Response not formatted correctly") def get_document(self, user, id): + """ + Get metadata for a specific document. + + Retrieves the metadata for a single document by ID. + + Args: + user: User identifier + id: Document identifier + + Returns: + DocumentMetadata: Document metadata object + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + library = api.library() + doc = library.get_document(user="trustgraph", id="doc-123") + print(f"Title: {doc.title}") + print(f"Comments: {doc.comments}") + ``` + """ input = { "operation": "get-document", @@ -152,6 +268,42 @@ class Library: raise ProtocolException(f"Response not formatted correctly") def update_document(self, user, id, metadata): + """ + Update document metadata. + + Updates the metadata for an existing document in the library. + + Args: + user: User identifier + id: Document identifier + metadata: Updated DocumentMetadata object + + Returns: + DocumentMetadata: Updated document metadata + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + library = api.library() + + # Get existing document + doc = library.get_document(user="trustgraph", id="doc-123") + + # Update metadata + doc.title = "Updated Title" + doc.comments = "Updated description" + doc.tags.append("reviewed") + + # Save changes + updated_doc = library.update_document( + user="trustgraph", + id="doc-123", + metadata=doc + ) + ``` + """ input = { "operation": "update-document", @@ -199,6 +351,24 @@ class Library: raise ProtocolException(f"Response not formatted correctly") def remove_document(self, user, id): + """ + Remove a document from the library. + + Deletes a document and its metadata from the library. + + Args: + user: User identifier + id: Document identifier to remove + + Returns: + dict: Empty response object + + Example: + ```python + library = api.library() + library.remove_document(user="trustgraph", id="doc-123") + ``` + """ input = { "operation": "remove-document", @@ -214,6 +384,38 @@ class Library: self, id, document_id, flow="default", user="trustgraph", collection="default", tags=[], ): + """ + Start a document processing workflow. + + Initiates processing of a document through a specified flow, tracking + the processing job with metadata. + + Args: + id: Unique processing job identifier + document_id: ID of the document to process + flow: Flow instance to use for processing (default: "default") + user: User identifier (default: "trustgraph") + collection: Target collection for processed data (default: "default") + tags: List of tags for the processing job (default: []) + + Returns: + dict: Empty response object + + Example: + ```python + library = api.library() + + # Start processing a document + library.start_processing( + id="proc-001", + document_id="doc-123", + flow="default", + user="trustgraph", + collection="research", + tags=["automated", "extract"] + ) + ``` + """ input = { "operation": "add-processing", @@ -233,8 +435,26 @@ class Library: return {} def stop_processing( - self, id, user="trustgraph", + self, id, user="trustgraph", ): + """ + Stop a running document processing job. + + Terminates an active document processing workflow and removes its metadata. + + Args: + id: Processing job identifier to stop + user: User identifier (default: "trustgraph") + + Returns: + dict: Empty response object + + Example: + ```python + library = api.library() + library.stop_processing(id="proc-001", user="trustgraph") + ``` + """ input = { "operation": "remove-processing", @@ -247,6 +467,34 @@ class Library: return {} def get_processings(self, user="trustgraph"): + """ + List all active document processing jobs. + + Retrieves metadata for all currently running document processing workflows + for the specified user. + + Args: + user: User identifier (default: "trustgraph") + + Returns: + list[ProcessingMetadata]: List of processing job metadata objects + + Raises: + ProtocolException: If response format is invalid + + Example: + ```python + library = api.library() + jobs = library.get_processings(user="trustgraph") + + for job in jobs: + print(f"Job {job.id}:") + print(f" Document: {job.document_id}") + print(f" Flow: {job.flow}") + print(f" Collection: {job.collection}") + print(f" Started: {job.time}") + ``` + """ input = { "operation": "list-processing", diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index 23e3dbc0..c712f808 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -1,3 +1,9 @@ +""" +TrustGraph Synchronous WebSocket Client + +This module provides synchronous WebSocket-based access to TrustGraph services with +streaming support for real-time responses from agents, RAG queries, and text completions. +""" import json import asyncio @@ -10,9 +16,26 @@ from . exceptions import ProtocolException, raise_from_error_dict class SocketClient: - """Synchronous WebSocket client (wraps async websockets library)""" + """ + Synchronous WebSocket client for streaming operations. + + Provides a synchronous interface to WebSocket-based TrustGraph services, + wrapping async websockets library with synchronous generators for ease of use. + Supports streaming responses from agents, RAG queries, and text completions. + + Note: This is a synchronous wrapper around async WebSocket operations. For + true async support, use AsyncSocketClient instead. + """ def __init__(self, url: str, timeout: int, token: Optional[str]) -> None: + """ + Initialize synchronous WebSocket client. + + Args: + url: Base URL for TrustGraph API (HTTP/HTTPS will be converted to WS/WSS) + timeout: WebSocket timeout in seconds + token: Optional bearer token for authentication + """ self.url: str = self._convert_to_ws_url(url) self.timeout: int = timeout self.token: Optional[str] = token @@ -22,7 +45,15 @@ class SocketClient: self._loop: Optional[asyncio.AbstractEventLoop] = None def _convert_to_ws_url(self, url: str) -> str: - """Convert HTTP URL to WebSocket URL""" + """ + Convert HTTP URL to WebSocket URL. + + Args: + url: HTTP/HTTPS or WS/WSS URL + + Returns: + str: WebSocket URL (ws:// or wss://) + """ if url.startswith("http://"): return url.replace("http://", "ws://", 1) elif url.startswith("https://"): @@ -34,7 +65,25 @@ class SocketClient: return f"ws://{url}" def flow(self, flow_id: str) -> "SocketFlowInstance": - """Get flow instance for WebSocket operations""" + """ + Get a flow instance for WebSocket streaming operations. + + Args: + flow_id: Flow identifier + + Returns: + SocketFlowInstance: Flow instance with streaming methods + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Stream agent responses + for chunk in flow.agent(question="Hello", user="trustgraph", streaming=True): + print(chunk.content, end='', flush=True) + ``` + """ return SocketFlowInstance(self, flow_id) def _send_request_sync( @@ -242,15 +291,32 @@ class SocketClient: ) def close(self) -> None: - """Close WebSocket connection""" + """ + Close WebSocket connections. + + Note: Cleanup is handled automatically by context managers in async code. + """ # Cleanup handled by context manager in async code pass class SocketFlowInstance: - """Synchronous WebSocket flow instance with same interface as REST FlowInstance""" + """ + Synchronous WebSocket flow instance for streaming operations. + + Provides the same interface as REST FlowInstance but with WebSocket-based + streaming support for real-time responses. All methods support an optional + `streaming` parameter to enable incremental result delivery. + """ def __init__(self, client: SocketClient, flow_id: str) -> None: + """ + Initialize socket flow instance. + + Args: + client: Parent SocketClient + flow_id: Flow identifier + """ self.client: SocketClient = client self.flow_id: str = flow_id @@ -264,7 +330,44 @@ class SocketFlowInstance: streaming: bool = False, **kwargs: Any ) -> Union[Dict[str, Any], Iterator[StreamingChunk]]: - """Agent with optional streaming""" + """ + Execute an agent operation with streaming support. + + Agents can perform multi-step reasoning with tool use. This method always + returns streaming chunks (thoughts, observations, answers) even when + streaming=False, to show the agent's reasoning process. + + Args: + question: User question or instruction + user: User identifier + state: Optional state dictionary for stateful conversations + group: Optional group identifier for multi-user contexts + history: Optional conversation history as list of message dicts + streaming: Enable streaming mode (default: False) + **kwargs: Additional parameters passed to the agent service + + Returns: + Iterator[StreamingChunk]: Stream of agent thoughts, observations, and answers + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Stream agent reasoning + for chunk in flow.agent( + question="What is quantum computing?", + user="trustgraph", + streaming=True + ): + if isinstance(chunk, AgentThought): + print(f"[Thinking] {chunk.content}") + elif isinstance(chunk, AgentObservation): + print(f"[Observation] {chunk.content}") + elif isinstance(chunk, AgentAnswer): + print(f"[Answer] {chunk.content}") + ``` + """ request = { "question": question, "user": user, @@ -283,7 +386,40 @@ class SocketFlowInstance: return self.client._send_request_sync("agent", self.flow_id, request, streaming=True) def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]: - """Text completion with optional streaming""" + """ + Execute text completion with optional streaming. + + Args: + system: System prompt defining the assistant's behavior + prompt: User prompt/question + streaming: Enable streaming mode (default: False) + **kwargs: Additional parameters passed to the service + + Returns: + Union[str, Iterator[str]]: Complete response or stream of text chunks + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Non-streaming + response = flow.text_completion( + system="You are helpful", + prompt="Explain quantum computing", + streaming=False + ) + print(response) + + # Streaming + for chunk in flow.text_completion( + system="You are helpful", + prompt="Explain quantum computing", + streaming=True + ): + print(chunk, end='', flush=True) + ``` + """ request = { "system": system, "prompt": prompt, @@ -316,7 +452,40 @@ class SocketFlowInstance: streaming: bool = False, **kwargs: Any ) -> Union[str, Iterator[str]]: - """Graph RAG with optional streaming""" + """ + Execute graph-based RAG query with optional streaming. + + Uses knowledge graph structure to find relevant context, then generates + a response using an LLM. Streaming mode delivers results incrementally. + + Args: + query: Natural language query + user: User/keyspace identifier + collection: Collection identifier + max_subgraph_size: Maximum total triples in subgraph (default: 1000) + max_subgraph_count: Maximum number of subgraphs (default: 5) + max_entity_distance: Maximum traversal depth (default: 3) + streaming: Enable streaming mode (default: False) + **kwargs: Additional parameters passed to the service + + Returns: + Union[str, Iterator[str]]: Complete response or stream of text chunks + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Streaming graph RAG + for chunk in flow.graph_rag( + query="Tell me about Marie Curie", + user="trustgraph", + collection="scientists", + streaming=True + ): + print(chunk, end='', flush=True) + ``` + """ request = { "query": query, "user": user, @@ -344,7 +513,39 @@ class SocketFlowInstance: streaming: bool = False, **kwargs: Any ) -> Union[str, Iterator[str]]: - """Document RAG with optional streaming""" + """ + Execute document-based RAG query with optional streaming. + + Uses vector embeddings to find relevant document chunks, then generates + a response using an LLM. Streaming mode delivers results incrementally. + + Args: + query: Natural language query + user: User/keyspace identifier + collection: Collection identifier + doc_limit: Maximum document chunks to retrieve (default: 10) + streaming: Enable streaming mode (default: False) + **kwargs: Additional parameters passed to the service + + Returns: + Union[str, Iterator[str]]: Complete response or stream of text chunks + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Streaming document RAG + for chunk in flow.document_rag( + query="Summarize the key findings", + user="trustgraph", + collection="research-papers", + doc_limit=5, + streaming=True + ): + print(chunk, end='', flush=True) + ``` + """ request = { "query": query, "user": user, @@ -374,7 +575,32 @@ class SocketFlowInstance: streaming: bool = False, **kwargs: Any ) -> Union[str, Iterator[str]]: - """Execute prompt with optional streaming""" + """ + Execute a prompt template with optional streaming. + + Args: + id: Prompt template identifier + variables: Dictionary of variable name to value mappings + streaming: Enable streaming mode (default: False) + **kwargs: Additional parameters passed to the service + + Returns: + Union[str, Iterator[str]]: Complete response or stream of text chunks + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Streaming prompt execution + for chunk in flow.prompt( + id="summarize-template", + variables={"topic": "quantum computing", "length": "brief"}, + streaming=True + ): + print(chunk, end='', flush=True) + ``` + """ request = { "id": id, "variables": variables, @@ -397,7 +623,32 @@ class SocketFlowInstance: limit: int = 10, **kwargs: Any ) -> Dict[str, Any]: - """Query graph embeddings for semantic search""" + """ + Query knowledge graph entities using semantic similarity. + + Args: + text: Query text for semantic search + user: User/keyspace identifier + collection: Collection identifier + limit: Maximum number of results (default: 10) + **kwargs: Additional parameters passed to the service + + Returns: + dict: Query results with similar entities + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + results = flow.graph_embeddings_query( + text="physicist who discovered radioactivity", + user="trustgraph", + collection="scientists", + limit=5 + ) + ``` + """ request = { "text": text, "user": user, @@ -409,7 +660,25 @@ class SocketFlowInstance: return self.client._send_request_sync("graph-embeddings", self.flow_id, request, False) def embeddings(self, text: str, **kwargs: Any) -> Dict[str, Any]: - """Generate text embeddings""" + """ + Generate vector embeddings for text. + + Args: + text: Input text to embed + **kwargs: Additional parameters passed to the service + + Returns: + dict: Response containing vectors + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + result = flow.embeddings("quantum computing") + vectors = result.get("vectors", []) + ``` + """ request = {"text": text} request.update(kwargs) @@ -425,7 +694,34 @@ class SocketFlowInstance: limit: int = 100, **kwargs: Any ) -> Dict[str, Any]: - """Triple pattern query""" + """ + Query knowledge graph triples using pattern matching. + + Args: + s: Subject URI (optional, use None for wildcard) + p: Predicate URI (optional, use None for wildcard) + o: Object URI or Literal (optional, use None for wildcard) + user: User/keyspace identifier (optional) + collection: Collection identifier (optional) + limit: Maximum results to return (default: 100) + **kwargs: Additional parameters passed to the service + + Returns: + dict: Query results with matching triples + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + # Find all triples about a specific subject + result = flow.triples_query( + s="http://example.org/person/marie-curie", + user="trustgraph", + collection="scientists" + ) + ``` + """ request = {"limit": limit} if s is not None: request["s"] = str(s) @@ -450,7 +746,41 @@ class SocketFlowInstance: operation_name: Optional[str] = None, **kwargs: Any ) -> Dict[str, Any]: - """GraphQL query""" + """ + Execute a GraphQL query against structured objects. + + Args: + query: GraphQL query string + user: User/keyspace identifier + collection: Collection identifier + variables: Optional query variables dictionary + operation_name: Optional operation name for multi-operation documents + **kwargs: Additional parameters passed to the service + + Returns: + dict: GraphQL response with data, errors, and/or extensions + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + query = ''' + { + scientists(limit: 10) { + name + field + discoveries + } + } + ''' + result = flow.objects_query( + query=query, + user="trustgraph", + collection="scientists" + ) + ``` + """ request = { "query": query, "user": user, @@ -470,7 +800,28 @@ class SocketFlowInstance: parameters: Dict[str, Any], **kwargs: Any ) -> Dict[str, Any]: - """Execute MCP tool""" + """ + Execute a Model Context Protocol (MCP) tool. + + Args: + name: Tool name/identifier + parameters: Tool parameters dictionary + **kwargs: Additional parameters passed to the service + + Returns: + dict: Tool execution result + + Example: + ```python + socket = api.socket() + flow = socket.flow("default") + + result = flow.mcp_tool( + name="search-web", + parameters={"query": "latest AI news", "limit": 5} + ) + ``` + """ request = { "name": name, "parameters": parameters diff --git a/trustgraph-base/trustgraph/api/types.py b/trustgraph-base/trustgraph/api/types.py index a8608853..3b4e476e 100644 --- a/trustgraph-base/trustgraph/api/types.py +++ b/trustgraph-base/trustgraph/api/types.py @@ -1,3 +1,9 @@ +""" +TrustGraph API Type Definitions + +Data classes and type definitions for TrustGraph API objects including knowledge +graph elements, metadata structures, and streaming response chunks. +""" import dataclasses import datetime @@ -6,23 +12,59 @@ from .. knowledge import hash, Uri, Literal @dataclasses.dataclass class Triple: + """ + RDF triple representing a knowledge graph statement. + + Attributes: + s: Subject (entity URI or value) + p: Predicate (relationship URI) + o: Object (entity URI, literal value, or typed value) + """ s : str p : str o : str @dataclasses.dataclass class ConfigKey: + """ + Configuration key identifier. + + Attributes: + type: Configuration type/category (e.g., "llm", "embedding") + key: Specific configuration key within the type + """ type : str key : str @dataclasses.dataclass class ConfigValue: + """ + Configuration key-value pair. + + Attributes: + type: Configuration type/category + key: Specific configuration key + value: Configuration value as string + """ type : str key : str value : str @dataclasses.dataclass class DocumentMetadata: + """ + Metadata for a document in the library. + + Attributes: + id: Unique document identifier + time: Document creation/upload timestamp + kind: Document MIME type (e.g., "application/pdf", "text/plain") + title: Document title + comments: Additional comments or description + metadata: List of RDF triples providing structured metadata + user: User/owner identifier + tags: List of tags for categorization + """ id : str time : datetime.datetime kind : str @@ -34,6 +76,18 @@ class DocumentMetadata: @dataclasses.dataclass class ProcessingMetadata: + """ + Metadata for an active document processing job. + + Attributes: + id: Unique processing job identifier + document_id: ID of the document being processed + time: Processing start timestamp + flow: Flow instance handling the processing + user: User identifier + collection: Target collection for processed data + tags: List of tags for categorization + """ id : str document_id : str time : datetime.datetime @@ -44,6 +98,19 @@ class ProcessingMetadata: @dataclasses.dataclass class CollectionMetadata: + """ + Metadata for a data collection. + + Collections provide logical grouping and isolation for documents and + knowledge graph data. + + Attributes: + user: User/owner identifier + collection: Collection identifier + name: Human-readable collection name + description: Collection description + tags: List of tags for categorization + """ user : str collection : str name : str @@ -54,29 +121,80 @@ class CollectionMetadata: @dataclasses.dataclass class StreamingChunk: - """Base class for streaming chunks""" + """ + Base class for streaming response chunks. + + Used for WebSocket-based streaming operations where responses are delivered + incrementally as they are generated. + + Attributes: + content: The text content of this chunk + end_of_message: True if this is the final chunk of a message segment + """ content: str end_of_message: bool = False @dataclasses.dataclass class AgentThought(StreamingChunk): - """Agent reasoning chunk""" + """ + Agent reasoning/thought process chunk. + + Represents the agent's internal reasoning or planning steps during execution. + These chunks show how the agent is thinking about the problem. + + Attributes: + content: Agent's thought text + end_of_message: True if this completes the current thought + chunk_type: Always "thought" + """ chunk_type: str = "thought" @dataclasses.dataclass class AgentObservation(StreamingChunk): - """Agent tool observation chunk""" + """ + Agent tool execution observation chunk. + + Represents the result or observation from executing a tool or action. + These chunks show what the agent learned from using tools. + + Attributes: + content: Observation text describing tool results + end_of_message: True if this completes the current observation + chunk_type: Always "observation" + """ chunk_type: str = "observation" @dataclasses.dataclass class AgentAnswer(StreamingChunk): - """Agent final answer chunk""" + """ + Agent final answer chunk. + + Represents the agent's final response to the user's query after completing + its reasoning and tool use. + + Attributes: + content: Answer text + end_of_message: True if this completes the current answer segment + end_of_dialog: True if this completes the entire agent interaction + chunk_type: Always "final-answer" + """ chunk_type: str = "final-answer" end_of_dialog: bool = False @dataclasses.dataclass class RAGChunk(StreamingChunk): - """RAG streaming chunk""" + """ + RAG (Retrieval-Augmented Generation) streaming chunk. + + Used for streaming responses from graph RAG, document RAG, text completion, + and other generative services. + + Attributes: + content: Generated text content + end_of_stream: True if this is the final chunk of the stream + error: Optional error information if an error occurred + chunk_type: Always "rag" + """ chunk_type: str = "rag" end_of_stream: bool = False error: Optional[Dict[str, str]] = None