mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
552 lines
21 KiB
JavaScript
552 lines
21 KiB
JavaScript
|
|
// ============================================================================
|
|
// TrustGraph Explainability API Demo
|
|
// ============================================================================
|
|
//
|
|
// This example demonstrates how to use the TrustGraph streaming agent API
|
|
// with explainability events. It shows how to:
|
|
//
|
|
// 1. Send an agent query and receive streaming thinking/observation/answer
|
|
// 2. Receive and parse explainability events as they arrive
|
|
// 3. Resolve the provenance chain for knowledge graph edges:
|
|
// subgraph -> chunk -> page -> document
|
|
// 4. Fetch source text from the librarian using chunk IDs
|
|
//
|
|
// Explainability events use RDF triples (W3C PROV ontology + TrustGraph
|
|
// namespace) to describe the retrieval pipeline. The key event types are:
|
|
//
|
|
// - AgentQuestion: The initial user query
|
|
// - Analysis/ToolUse: Agent deciding which tool to invoke
|
|
// - GraphRagQuestion: A sub-query sent to the Graph RAG pipeline
|
|
// - Grounding: Concepts extracted from the query for graph traversal
|
|
// - Exploration: Entities discovered during knowledge graph traversal
|
|
// - Focus: The selected knowledge graph edges (triples) used for context
|
|
// - Synthesis: The RAG answer synthesised from retrieved context
|
|
// - Observation: The tool result returned to the agent
|
|
// - Conclusion/Answer: The agent's final answer
|
|
//
|
|
// Each event carries RDF triples that link back through the provenance chain,
|
|
// allowing full traceability from answer back to source documents.
|
|
// ============================================================================
|
|
|
|
import { createTrustGraphSocket } from '@trustgraph/client';
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Configuration
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const USER = "trustgraph";
|
|
|
|
// Simple question
|
|
const QUESTION = "Tell me about the author of the document";
|
|
|
|
// Likely to trigger the deep research plan-and-execute pattern
|
|
//const QUESTION = "Do deep research and explain the risks posed globalisation in the modern world";
|
|
|
|
const SOCKET_URL = "ws://localhost:8088/api/v1/socket";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// RDF predicates and TrustGraph namespace constants
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
|
|
const RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label";
|
|
const PROV_DERIVED = "http://www.w3.org/ns/prov#wasDerivedFrom";
|
|
|
|
const TG_GROUNDING = "https://trustgraph.ai/ns/Grounding";
|
|
const TG_CONCEPT = "https://trustgraph.ai/ns/concept";
|
|
const TG_EXPLORATION = "https://trustgraph.ai/ns/Exploration";
|
|
const TG_ENTITY = "https://trustgraph.ai/ns/entity";
|
|
const TG_FOCUS = "https://trustgraph.ai/ns/Focus";
|
|
const TG_EDGE = "https://trustgraph.ai/ns/edge";
|
|
const TG_CONTAINS = "https://trustgraph.ai/ns/contains";
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Utility: check whether a set of triples assigns a given RDF type to an ID
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const isType = (triples, id, type) =>
|
|
triples.some(t => t.s.i === id && t.p.i === RDF_TYPE && t.o.i === type);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Utility: word-wrap text for display
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const wrapText = (text, width, indent, maxLines) => {
|
|
const clean = text.replace(/\s+/g, " ").trim();
|
|
const lines = [];
|
|
let remaining = clean;
|
|
while (remaining.length > 0 && lines.length < maxLines) {
|
|
if (remaining.length <= width) {
|
|
lines.push(remaining);
|
|
break;
|
|
}
|
|
let breakAt = remaining.lastIndexOf(" ", width);
|
|
if (breakAt <= 0) breakAt = width;
|
|
lines.push(remaining.substring(0, breakAt));
|
|
remaining = remaining.substring(breakAt).trimStart();
|
|
}
|
|
if (remaining.length > 0 && lines.length >= maxLines)
|
|
lines[lines.length - 1] += " ...";
|
|
return lines.map(l => indent + l).join("\n");
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Connect to TrustGraph
|
|
// ---------------------------------------------------------------------------
|
|
|
|
console.log("=".repeat(80));
|
|
console.log("TrustGraph Explainability API Demo");
|
|
console.log("=".repeat(80));
|
|
console.log(`Connecting to: ${SOCKET_URL}`);
|
|
console.log(`Question: ${QUESTION}`);
|
|
console.log("=".repeat(80));
|
|
|
|
const client = createTrustGraphSocket(USER, undefined, SOCKET_URL);
|
|
|
|
console.log("Connected, sending query...\n");
|
|
|
|
// Get a flow handle. Flows provide access to AI operations (agent, RAG,
|
|
// text completion, etc.) as well as knowledge graph queries.
|
|
const flow = client.flow("default");
|
|
|
|
// Get a librarian handle for fetching source document text.
|
|
const librarian = client.librarian();
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Inline explain event printing
|
|
// ---------------------------------------------------------------------------
|
|
// Explain events arrive during streaming alongside thinking/observation/
|
|
// answer chunks. We print a summary immediately and store them for
|
|
// post-processing (label resolution and provenance lookups require async
|
|
// queries that can't run inside the synchronous callback).
|
|
|
|
const explainEvents = [];
|
|
|
|
const printExplainInline = (explainEvent) => {
|
|
const { explainId, explainTriples } = explainEvent;
|
|
if (!explainTriples) return;
|
|
|
|
// Extract the RDF types assigned to the explain event's own ID.
|
|
// Every explain event has rdf:type triples that identify what kind
|
|
// of pipeline step it represents (Grounding, Exploration, Focus, etc.)
|
|
const types = explainTriples
|
|
.filter(t => t.s.i === explainId && t.p.i === RDF_TYPE)
|
|
.map(t => t.o.i);
|
|
|
|
// Show short type names (e.g. "Grounding" instead of full URI)
|
|
const shortTypes = types
|
|
.map(t => t.split("/").pop().split("#").pop())
|
|
.join(", ");
|
|
console.log(` [explain] ${shortTypes}`);
|
|
|
|
// Grounding events contain the concepts extracted from the query.
|
|
// These are the seed terms used to begin knowledge graph traversal.
|
|
if (isType(explainTriples, explainId, TG_GROUNDING)) {
|
|
const concepts = explainTriples
|
|
.filter(t => t.s.i === explainId && t.p.i === TG_CONCEPT)
|
|
.map(t => t.o.v);
|
|
console.log(` Grounding concepts: ${concepts.join(", ")}`);
|
|
}
|
|
|
|
// Exploration events list the entities found during graph traversal.
|
|
// We show the count here; labelled names are printed after resolution.
|
|
if (isType(explainTriples, explainId, TG_EXPLORATION)) {
|
|
const count = explainTriples
|
|
.filter(t => t.s.i === explainId && t.p.i === TG_ENTITY).length;
|
|
console.log(` Entities: ${count} found (see below)`);
|
|
}
|
|
};
|
|
|
|
const collectExplain = (explainEvent) => {
|
|
printExplainInline(explainEvent);
|
|
explainEvents.push(explainEvent);
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Label resolution
|
|
// ---------------------------------------------------------------------------
|
|
// Many explain triples reference entities and predicates by URI. We query
|
|
// the knowledge graph for rdfs:label to get human-readable names.
|
|
|
|
const resolveLabels = async (uris) => {
|
|
const labels = new Map();
|
|
await Promise.all(uris.map(async (uri) => {
|
|
try {
|
|
const results = await flow.triplesQuery(
|
|
{ t: "i", i: uri },
|
|
{ t: "i", i: RDFS_LABEL },
|
|
);
|
|
if (results.length > 0) {
|
|
labels.set(uri, results[0].o.v);
|
|
}
|
|
} catch (e) {
|
|
// No label found, fall back to URI
|
|
}
|
|
}));
|
|
return labels;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Provenance resolution for knowledge graph edges
|
|
// ---------------------------------------------------------------------------
|
|
// Focus events contain the knowledge graph triples (edges) that were selected
|
|
// as context for the RAG answer. Each edge can be traced back through the
|
|
// provenance chain to the original source document:
|
|
//
|
|
// subgraph --contains--> <<edge triple>> (RDF-star triple term)
|
|
// subgraph --wasDerivedFrom--> chunk (text chunk)
|
|
// chunk --wasDerivedFrom--> page (document page)
|
|
// page --wasDerivedFrom--> document (original document)
|
|
//
|
|
// The chunk URI also serves as the content ID in the librarian, so it can
|
|
// be used to fetch the actual source text.
|
|
|
|
const resolveEdgeSources = async (edgeTriples) => {
|
|
const iri = (uri) => ({ t: "i", i: uri });
|
|
const sources = new Map();
|
|
|
|
await Promise.all(edgeTriples.map(async (tr) => {
|
|
const key = JSON.stringify(tr);
|
|
try {
|
|
// Step 1: Find the subgraph that contains this edge triple.
|
|
// The query uses an RDF-star triple term as the object: the
|
|
// knowledge graph stores subgraph -> contains -> <<s, p, o>>.
|
|
const subgraphResults = await flow.triplesQuery(
|
|
undefined,
|
|
iri(TG_CONTAINS),
|
|
{ t: "t", tr },
|
|
);
|
|
if (subgraphResults.length === 0) {
|
|
if (tr.o.t === "l" || tr.o.t === "i") {
|
|
console.log(` No source match for triple:`);
|
|
console.log(` s: ${tr.s.i}`);
|
|
console.log(` p: ${tr.p.i}`);
|
|
console.log(` o: ${JSON.stringify(tr.o)}`);
|
|
}
|
|
return;
|
|
}
|
|
const subgraph = subgraphResults[0].s.i;
|
|
|
|
// Step 2: Walk wasDerivedFrom chain: subgraph -> chunk
|
|
const chunkResults = await flow.triplesQuery(
|
|
iri(subgraph), iri(PROV_DERIVED),
|
|
);
|
|
if (chunkResults.length === 0) {
|
|
sources.set(key, { subgraph });
|
|
return;
|
|
}
|
|
const chunk = chunkResults[0].o.i;
|
|
|
|
// Step 3: chunk -> page
|
|
const pageResults = await flow.triplesQuery(
|
|
iri(chunk), iri(PROV_DERIVED),
|
|
);
|
|
if (pageResults.length === 0) {
|
|
sources.set(key, { subgraph, chunk });
|
|
return;
|
|
}
|
|
const page = pageResults[0].o.i;
|
|
|
|
// Step 4: page -> document
|
|
const docResults = await flow.triplesQuery(
|
|
iri(page), iri(PROV_DERIVED),
|
|
);
|
|
const document = docResults.length > 0 ? docResults[0].o.i : undefined;
|
|
|
|
sources.set(key, { subgraph, chunk, page, document });
|
|
} catch (e) {
|
|
// Query failed, skip this edge
|
|
}
|
|
}));
|
|
|
|
return sources;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Collect URIs that need label resolution
|
|
// ---------------------------------------------------------------------------
|
|
// Scans explain events for entity URIs (from Exploration events) and edge
|
|
// term URIs (from Focus events) so we can batch-resolve their labels.
|
|
|
|
const collectUris = (events) => {
|
|
const uris = new Set();
|
|
for (const { explainId, explainTriples } of events) {
|
|
if (!explainTriples) continue;
|
|
|
|
// Entity URIs from exploration
|
|
if (isType(explainTriples, explainId, TG_EXPLORATION)) {
|
|
for (const t of explainTriples) {
|
|
if (t.s.i === explainId && t.p.i === TG_ENTITY)
|
|
uris.add(t.o.i);
|
|
}
|
|
}
|
|
|
|
// Subject, predicate, and object URIs from focus edge triples
|
|
if (isType(explainTriples, explainId, TG_FOCUS)) {
|
|
for (const t of explainTriples) {
|
|
if (t.p.i === TG_EDGE && t.o.t === "t") {
|
|
const tr = t.o.tr;
|
|
if (tr.s.t === "i") uris.add(tr.s.i);
|
|
if (tr.p.t === "i") uris.add(tr.p.i);
|
|
if (tr.o.t === "i") uris.add(tr.o.i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return uris;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Collect edge triples from Focus events
|
|
// ---------------------------------------------------------------------------
|
|
// Focus events contain selectedEdge -> edge relationships. Each edge's
|
|
// object is an RDF-star triple term ({t: "t", tr: {s, p, o}}) representing
|
|
// the actual knowledge graph triple used as RAG context.
|
|
|
|
const collectEdgeTriples = (events) => {
|
|
const edges = [];
|
|
for (const { explainId, explainTriples } of events) {
|
|
if (!explainTriples) continue;
|
|
if (isType(explainTriples, explainId, TG_FOCUS)) {
|
|
for (const t of explainTriples) {
|
|
if (t.p.i === TG_EDGE && t.o.t === "t")
|
|
edges.push(t.o.tr);
|
|
}
|
|
}
|
|
}
|
|
return edges;
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Print knowledge graph edges with provenance
|
|
// ---------------------------------------------------------------------------
|
|
// Displays each edge triple with resolved labels and its source location
|
|
// (chunk -> page -> document).
|
|
|
|
const printFocusEdges = (events, labels, edgeSources) => {
|
|
const label = (uri) => labels.get(uri) || uri;
|
|
|
|
for (const { explainId, explainTriples } of events) {
|
|
if (!explainTriples) continue;
|
|
if (!isType(explainTriples, explainId, TG_FOCUS)) continue;
|
|
|
|
const termValue = (term) =>
|
|
term.t === "i" ? label(term.i) : (term.v || "?");
|
|
|
|
const edges = explainTriples
|
|
.filter(t => t.p.i === TG_EDGE && t.o.t === "t")
|
|
.map(t => t.o.tr);
|
|
|
|
const display = edges.slice(0, 20);
|
|
for (const tr of display) {
|
|
console.log(` ${termValue(tr.s)} -> ${termValue(tr.p)} -> ${termValue(tr.o)}`);
|
|
const src = edgeSources.get(JSON.stringify(tr));
|
|
if (src) {
|
|
const parts = [];
|
|
if (src.chunk) parts.push(label(src.chunk));
|
|
if (src.page) parts.push(label(src.page));
|
|
if (src.document) parts.push(label(src.document));
|
|
if (parts.length > 0)
|
|
console.log(` Source: ${parts.join(" -> ")}`);
|
|
}
|
|
}
|
|
if (edges.length > 20)
|
|
console.log(` ... and ${edges.length - 20} more`);
|
|
}
|
|
};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Fetch chunk text from the librarian
|
|
// ---------------------------------------------------------------------------
|
|
// The chunk URI (e.g. urn:chunk:UUID) serves as a universal ID that ties
|
|
// together provenance metadata, embeddings, and the source text content.
|
|
// The librarian stores the original text keyed by this same URI, so we
|
|
// can retrieve it with streamDocument(chunkUri).
|
|
|
|
const fetchChunkText = (chunkUri) => {
|
|
return new Promise((resolve, reject) => {
|
|
let text = "";
|
|
librarian.streamDocument(
|
|
chunkUri,
|
|
(content, chunkIndex, totalChunks, complete) => {
|
|
text += content;
|
|
if (complete) resolve(text);
|
|
},
|
|
(error) => reject(error),
|
|
);
|
|
});
|
|
};
|
|
|
|
// ===========================================================================
|
|
// Send the agent query
|
|
// ===========================================================================
|
|
// The agent callback receives four types of streaming content:
|
|
// - think: the agent's reasoning (chain-of-thought)
|
|
// - observe: tool results returned to the agent
|
|
// - answer: the final answer being generated
|
|
// - error: any errors during processing
|
|
//
|
|
// The onExplain callback fires for each explainability event, delivering
|
|
// RDF triples that describe what happened at each pipeline stage.
|
|
|
|
let thought = "";
|
|
let obs = "";
|
|
let ans = "";
|
|
|
|
await flow.agent(
|
|
|
|
QUESTION,
|
|
|
|
// Think callback: agent reasoning / chain-of-thought
|
|
(chunk, complete, messageId, metadata) => {
|
|
thought += chunk;
|
|
if (complete) {
|
|
console.log("\nThinking:", thought, "\n");
|
|
thought = "";
|
|
}
|
|
},
|
|
|
|
// Observe callback: tool results returned to the agent
|
|
(chunk, complete, messageId, metadata) => {
|
|
obs += chunk;
|
|
if (complete) {
|
|
console.log("\nObservation:", obs, "\n");
|
|
obs = "";
|
|
}
|
|
},
|
|
|
|
// Answer callback: the agent's final response
|
|
(chunk, complete, messageId, metadata) => {
|
|
ans += chunk;
|
|
if (complete) {
|
|
console.log("\nAnswer:", ans, "\n");
|
|
ans = "";
|
|
}
|
|
},
|
|
|
|
// Error callback
|
|
(error) => {
|
|
console.log(JSON.stringify({ type: "error", error }, null, 2));
|
|
},
|
|
|
|
// Explain callback: explainability events with RDF triples
|
|
(explainEvent) => {
|
|
collectExplain(explainEvent);
|
|
}
|
|
|
|
);
|
|
|
|
// ===========================================================================
|
|
// Post-processing: resolve labels, provenance, and source text
|
|
// ===========================================================================
|
|
// After the agent query completes, we have all the explain events. Now we
|
|
// can make async queries to:
|
|
// 1. Trace each edge back to its source document (provenance chain)
|
|
// 2. Resolve URIs to human-readable labels
|
|
// 3. Fetch the original text for each source chunk
|
|
|
|
console.log("Resolving provenance...\n");
|
|
|
|
// Resolve the provenance chain for each knowledge graph edge
|
|
const edgeTriples = collectEdgeTriples(explainEvents);
|
|
const edgeSources = await resolveEdgeSources(edgeTriples);
|
|
|
|
// Collect all URIs that need labels: entities, edge terms, and source URIs
|
|
const uris = collectUris(explainEvents);
|
|
for (const src of edgeSources.values()) {
|
|
if (src.chunk) uris.add(src.chunk);
|
|
if (src.page) uris.add(src.page);
|
|
if (src.document) uris.add(src.document);
|
|
}
|
|
const labels = await resolveLabels([...uris]);
|
|
|
|
const label = (uri) => labels.get(uri) || uri;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Display: Entities retrieved during graph exploration
|
|
// ---------------------------------------------------------------------------
|
|
|
|
for (const { explainId, explainTriples } of explainEvents) {
|
|
if (!explainTriples) continue;
|
|
if (!isType(explainTriples, explainId, TG_EXPLORATION)) continue;
|
|
const entities = explainTriples
|
|
.filter(t => t.s.i === explainId && t.p.i === TG_ENTITY)
|
|
.map(t => label(t.o.i));
|
|
const display = entities.slice(0, 10);
|
|
console.log("=".repeat(80));
|
|
console.log("Entities Retrieved");
|
|
console.log("=".repeat(80));
|
|
console.log(` ${entities.length} entities: ${display.join(", ")}${entities.length > 10 ? ", ..." : ""}`);
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Display: Knowledge graph edges with provenance
|
|
// ---------------------------------------------------------------------------
|
|
|
|
console.log("\n" + "=".repeat(80));
|
|
console.log("Knowledge Graph Edges");
|
|
console.log("=".repeat(80));
|
|
printFocusEdges(explainEvents, labels, edgeSources);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Display: Source text for each chunk referenced by the edges
|
|
// ---------------------------------------------------------------------------
|
|
|
|
const uniqueChunks = new Set();
|
|
for (const src of edgeSources.values()) {
|
|
if (src.chunk) uniqueChunks.add(src.chunk);
|
|
}
|
|
|
|
console.log(`\nFetching text for ${uniqueChunks.size} source chunks...`);
|
|
const chunkTexts = new Map();
|
|
await Promise.all([...uniqueChunks].map(async (chunkUri) => {
|
|
try {
|
|
// streamDocument returns base64-encoded content
|
|
const text = await fetchChunkText(chunkUri);
|
|
chunkTexts.set(chunkUri, text);
|
|
} catch (e) {
|
|
// Failed to fetch text for this chunk
|
|
}
|
|
}));
|
|
|
|
console.log("\n" + "=".repeat(80));
|
|
console.log("Sources");
|
|
console.log("=".repeat(80));
|
|
|
|
let sourceIndex = 0;
|
|
for (const chunkUri of uniqueChunks) {
|
|
sourceIndex++;
|
|
const chunkLabel = labels.get(chunkUri) || chunkUri;
|
|
|
|
// Find the page and document labels for this chunk
|
|
let pageLabel, docLabel;
|
|
for (const src of edgeSources.values()) {
|
|
if (src.chunk === chunkUri) {
|
|
if (src.page) pageLabel = labels.get(src.page) || src.page;
|
|
if (src.document) docLabel = labels.get(src.document) || src.document;
|
|
break;
|
|
}
|
|
}
|
|
|
|
console.log(`\n [${sourceIndex}] ${docLabel || "?"} / ${pageLabel || "?"} / ${chunkLabel}`);
|
|
console.log(" " + "-".repeat(70));
|
|
|
|
// Decode the base64 content and display a wrapped snippet
|
|
const b64 = chunkTexts.get(chunkUri);
|
|
if (b64) {
|
|
const text = Buffer.from(b64, "base64").toString("utf-8");
|
|
console.log(wrapText(text, 76, " ", 6));
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Clean up
|
|
// ---------------------------------------------------------------------------
|
|
|
|
console.log("\n" + "=".repeat(80));
|
|
console.log("Query complete");
|
|
console.log("=".repeat(80));
|
|
|
|
client.close();
|
|
process.exit(0);
|