From 6dfa47aac8661dc50944f166e687a9c99ca90448 Mon Sep 17 00:00:00 2001
From: Jack Colquitt <126733989+JackColquitt@users.noreply.github.com>
Date: Sat, 30 May 2026 17:07:19 -0700
Subject: [PATCH 1/3] Revise README for semantic infrastructure terminology
(#962)
Updated the README to reflect changes in terminology and improve clarity regarding the platform's features.
---
README.md | 32 +++++++++++++++-----------------
1 file changed, 15 insertions(+), 17 deletions(-)
diff --git a/README.md b/README.md
index c366a3d9..1edccff6 100644
--- a/README.md
+++ b/README.md
@@ -11,11 +11,11 @@
-# The agent runtime platform
+# The semantic infrastructure for agents
-TrustGraph is an agent runtime platform built around context graphs — structured, queryable representations of your domain knowledge that ground every agent query in verified, explainable facts in private deployments with sovereign control. The platform is the full stack for agentic systems: context graphs, memory, retrieval, orchestration, and inference for precision-critical agent workloads.
+TrustGraph is a comprehensive semantic infrastructure for agents built around context graphs — structured, queryable representations of your domain knowledge that ground every agent query in verified, explainable facts in private deployments with sovereign control. The platform is the full stack for agentic systems: context graphs, memory, retrieval, orchestration, and inference for deterministic agent workloads.
The platform:
- [x] Multi-model and multimodal database system
@@ -99,23 +99,21 @@ For a browser based configuration, try the [Configuration Terminal](https://conf
- [**Developer APIs and CLI**](https://docs.trustgraph.ai/reference)
- [**Deployment Guides**](https://docs.trustgraph.ai/deployment)
-## Workbench
+## Context Graph UI
-The **Workbench** provides tools for all major features of TrustGraph. The **Workbench** is on port `8888` by default.
+
-- **Vector Search**: Search the installed knowledge bases
-- **Agentic, GraphRAG and LLM Chat**: Chat interface for agents, GraphRAG queries, or direct to LLMs
-- **Relationships**: Analyze deep relationships in the installed knowledge bases
-- **Graph Visualizer**: 3D GraphViz of the installed knowledge bases
-- **Library**: Staging area for installing knowledge bases
-- **Flow Classes**: Workflow preset configurations
-- **Flows**: Create custom workflows and adjust LLM parameters during runtime
-- **Knowledge Cores**: Manage resuable knowledge bases
-- **Prompts**: Manage and adjust prompts during runtime
-- **Schemas**: Define custom schemas for structured data knowledge bases
-- **Ontologies**: Define custom ontologies for unstructured data knowledge bases
-- **Agent Tools**: Define tools with collections, knowledge cores, MCP connections, and tool groups
-- **MCP Tools**: Connect to MCP servers
+The UI provides tools for all major features of TrustGraph. The UI deploys on port `8888` by default.
+
+- **Agent Console** — Query your agents directly with streaming responses and live explainability event tracking, so you can watch reasoning unfold in real time
+- **GraphRAG View** — Interactive graph RAG queries with a visual explainability DAG and inline provenance display, making it easy to see exactly where answers came from
+- **Context Explorer** — An interactive 3D context graph explorer with dynamic graph loading, BFS neighborhood extraction, edge pulse animation, and multiple navigation views
+- **Document Ingestion** — A complete upload and submission workflow with page and chunk inspection and document structure browsing
+- **Ontology Workbench** — A full ontology editor with class and property trees, OWL/XML and Turtle import/export with round-trip fidelity, circular dependency detection, and safe-delete confirmation dialogs
+- **Schema Workbench** — Interactive schema management with list, create, edit, and delete operations including field and index management
+- **Flow Management** — Flow creation and detail views with configurable parameters, temperature controls, and grouped storage layout
+- **Workspace UX** — Workspace selection and management surfaced directly in the interface
+- **Prompt Editor** — A dedicated prompt editing workflow
## TypeScript Library for UIs
From 97453d9b8319910c4f9d1860a6752b5a9c9f53ed Mon Sep 17 00:00:00 2001
From: Jack Colquitt <126733989+JackColquitt@users.noreply.github.com>
Date: Mon, 1 Jun 2026 14:08:30 -0700
Subject: [PATCH 2/3] Change project title to 'The semantic deployment
platform' (#968)
Updated the project title in the README.
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 1edccff6..b66edc70 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@
-# The semantic infrastructure for agents
+# The semantic deployment platform
From 28a51c244f60c9fe189aa972e1fd58940cd44c64 Mon Sep 17 00:00:00 2001
From: Jacob Molz
Date: Tue, 9 Jun 2026 11:37:10 -0400
Subject: [PATCH 3/3] fix: reject invalid PDF decoder input (#977)
---
tests/unit/test_decoding/test_pdf_decoder.py | 48 ++++++++++++++-
.../trustgraph/decoding/pdf/pdf_decoder.py | 60 +++++++++++--------
2 files changed, 79 insertions(+), 29 deletions(-)
diff --git a/tests/unit/test_decoding/test_pdf_decoder.py b/tests/unit/test_decoding/test_pdf_decoder.py
index 04807b20..641a9d78 100644
--- a/tests/unit/test_decoding/test_pdf_decoder.py
+++ b/tests/unit/test_decoding/test_pdf_decoder.py
@@ -49,7 +49,7 @@ class TestPdfDecoderProcessor(IsolatedAsyncioTestCase):
async def test_on_message_success(self, mock_pdf_loader_class, mock_producer, mock_consumer):
"""Test successful PDF processing"""
# Mock PDF content
- pdf_content = b"fake pdf content"
+ pdf_content = b"%PDF-1.7\nfake pdf content"
pdf_base64 = base64.b64encode(pdf_content).decode('utf-8')
# Mock PyPDFLoader
@@ -88,13 +88,55 @@ class TestPdfDecoderProcessor(IsolatedAsyncioTestCase):
# Verify triples were sent for each page (provenance)
assert mock_triples_flow.send.call_count == 2
+ @patch('trustgraph.base.librarian_client.Consumer')
+ @patch('trustgraph.base.librarian_client.Producer')
+ @patch('trustgraph.decoding.pdf.pdf_decoder.PyPDFLoader')
+ @patch('trustgraph.base.async_processor.AsyncProcessor', MockAsyncProcessor)
+ async def test_on_message_rejects_librarian_content_that_is_not_pdf(self, mock_pdf_loader_class, mock_producer, mock_consumer):
+ """Test rejecting non-PDF content before invoking the PDF loader"""
+ html_content = b"Not found"
+ html_base64 = base64.b64encode(html_content)
+
+ mock_metadata = Metadata(id="test-doc")
+ mock_document = Document(metadata=mock_metadata, document_id="doc-123")
+ mock_msg = MagicMock()
+ mock_msg.value.return_value = mock_document
+
+ mock_output_flow = AsyncMock()
+ mock_triples_flow = AsyncMock()
+ mock_flow = MagicMock(side_effect=lambda name: {
+ "output": mock_output_flow,
+ "triples": mock_triples_flow,
+ }.get(name))
+ mock_flow.librarian.fetch_document_metadata = AsyncMock(
+ return_value=MagicMock(kind="application/pdf")
+ )
+ mock_flow.librarian.fetch_document_content = AsyncMock(
+ return_value=html_base64
+ )
+ mock_flow.librarian.save_child_document = AsyncMock()
+
+ config = {
+ 'id': 'test-pdf-decoder',
+ 'taskgroup': AsyncMock()
+ }
+
+ processor = Processor(**config)
+
+ await processor.on_message(mock_msg, None, mock_flow)
+
+ mock_pdf_loader_class.assert_not_called()
+ mock_output_flow.send.assert_not_called()
+ mock_triples_flow.send.assert_not_called()
+ mock_flow.librarian.save_child_document.assert_not_called()
+
@patch('trustgraph.base.librarian_client.Consumer')
@patch('trustgraph.base.librarian_client.Producer')
@patch('trustgraph.decoding.pdf.pdf_decoder.PyPDFLoader')
@patch('trustgraph.base.async_processor.AsyncProcessor', MockAsyncProcessor)
async def test_on_message_empty_pdf(self, mock_pdf_loader_class, mock_producer, mock_consumer):
"""Test handling of empty PDF"""
- pdf_content = b"fake pdf content"
+ pdf_content = b"%PDF-1.7\nfake pdf content"
pdf_base64 = base64.b64encode(pdf_content).decode('utf-8')
mock_loader = MagicMock()
@@ -126,7 +168,7 @@ class TestPdfDecoderProcessor(IsolatedAsyncioTestCase):
@patch('trustgraph.base.async_processor.AsyncProcessor', MockAsyncProcessor)
async def test_on_message_unicode_content(self, mock_pdf_loader_class, mock_producer, mock_consumer):
"""Test handling of unicode content in PDF"""
- pdf_content = b"fake pdf content"
+ pdf_content = b"%PDF-1.7\nfake pdf content"
pdf_base64 = base64.b64encode(pdf_content).decode('utf-8')
mock_loader = MagicMock()
diff --git a/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py b/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
index ca242265..ae393028 100755
--- a/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
+++ b/trustgraph-flow/trustgraph/decoding/pdf/pdf_decoder.py
@@ -32,6 +32,10 @@ logger = logging.getLogger(__name__)
default_ident = "document-decoder"
+def _looks_like_pdf(content):
+ return content.lstrip().startswith(b"%PDF-")
+
+
class Processor(FlowProcessor):
def __init__(self, **params):
@@ -94,33 +98,37 @@ class Processor(FlowProcessor):
)
return
- with tempfile.NamedTemporaryFile(delete_on_close=False, suffix='.pdf') as fp:
+ # Check if we should fetch from librarian or use inline data
+ if v.document_id:
+ # Fetch from librarian via Pulsar
+ logger.info(f"Fetching document {v.document_id} from librarian...")
+
+ content = await flow.librarian.fetch_document_content(
+ document_id=v.document_id,
+
+ )
+
+ # Content is base64 encoded
+ if isinstance(content, str):
+ content = content.encode('utf-8')
+ decoded_content = base64.b64decode(content)
+
+ logger.info(f"Fetched {len(decoded_content)} bytes from librarian")
+ else:
+ # Use inline data (backward compatibility)
+ decoded_content = base64.b64decode(v.data)
+
+ if not _looks_like_pdf(decoded_content):
+ logger.error(
+ f"Document {v.metadata.id} is not valid PDF content. "
+ f"Ignoring document."
+ )
+ return
+
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as fp:
temp_path = fp.name
-
- # Check if we should fetch from librarian or use inline data
- if v.document_id:
- # Fetch from librarian via Pulsar
- logger.info(f"Fetching document {v.document_id} from librarian...")
- fp.close()
-
- content = await flow.librarian.fetch_document_content(
- document_id=v.document_id,
-
- )
-
- # Content is base64 encoded
- if isinstance(content, str):
- content = content.encode('utf-8')
- decoded_content = base64.b64decode(content)
-
- with open(temp_path, 'wb') as f:
- f.write(decoded_content)
-
- logger.info(f"Fetched {len(decoded_content)} bytes from librarian")
- else:
- # Use inline data (backward compatibility)
- fp.write(base64.b64decode(v.data))
- fp.close()
+ fp.write(decoded_content)
+ fp.close()
global PyPDFLoader
if PyPDFLoader is None: