mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-09 06:45:13 +02:00
Add unified explainability support and librarian storage for (#693)
Add unified explainability support and librarian storage for all retrieval engines Implements consistent explainability/provenance tracking across GraphRAG, DocumentRAG, and Agent retrieval engines. All large content (answers, thoughts, observations) is now stored in librarian rather than as inline literals in the knowledge graph. Explainability API: - New explainability.py module with entity classes (Question, Exploration, Focus, Synthesis, Analysis, Conclusion) and ExplainabilityClient - Quiescence-based eventual consistency handling for trace fetching - Content fetching from librarian with retry logic CLI updates: - tg-invoke-graph-rag -x/--explainable flag returns explain_id - tg-invoke-document-rag -x/--explainable flag returns explain_id - tg-invoke-agent -x/--explainable flag returns explain_id - tg-list-explain-traces uses new explainability API - tg-show-explain-trace handles all three trace types Agent provenance: - Records session, iterations (think/act/observe), and conclusion - Stores thoughts and observations in librarian with document references - New predicates: tg:thoughtDocument, tg:observationDocument DocumentRAG provenance: - Records question, exploration (chunk retrieval), and synthesis - Stores answers in librarian with document references Schema changes: - AgentResponse: added explain_id, explain_graph fields - RetrievalResponse: added explain_id, explain_graph fields - agent_iteration_triples: supports thought_document_id, observation_document_id Update tests.
This commit is contained in:
parent
aecf00f040
commit
35128ff019
24 changed files with 2736 additions and 846 deletions
|
|
@ -2,6 +2,8 @@
|
|||
Simple agent infrastructure broadly implements the ReAct flow.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
|
@ -17,9 +19,13 @@ from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
|
|||
from ... base import GraphRagClientSpec, ToolClientSpec, StructuredQueryClientSpec
|
||||
from ... base import RowEmbeddingsQueryClientSpec, EmbeddingsClientSpec
|
||||
from ... base import ProducerSpec
|
||||
from ... base import Consumer, Producer
|
||||
from ... base import ConsumerMetrics, ProducerMetrics
|
||||
|
||||
from ... schema import AgentRequest, AgentResponse, AgentStep, Error
|
||||
from ... schema import Triples, Metadata
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
|
||||
# Provenance imports for agent explainability
|
||||
from trustgraph.provenance import (
|
||||
|
|
@ -41,6 +47,8 @@ from . types import Final, Action, Tool, Argument
|
|||
|
||||
default_ident = "agent-manager"
|
||||
default_max_iterations = 10
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
default_librarian_response_queue = librarian_response_queue
|
||||
|
||||
class Processor(AgentService):
|
||||
|
||||
|
|
@ -129,6 +137,115 @@ class Processor(AgentService):
|
|||
)
|
||||
)
|
||||
|
||||
# Librarian client for storing answer content
|
||||
librarian_request_q = params.get(
|
||||
"librarian_request_queue", default_librarian_request_queue
|
||||
)
|
||||
librarian_response_q = params.get(
|
||||
"librarian_response_queue", default_librarian_response_queue
|
||||
)
|
||||
|
||||
librarian_request_metrics = ProducerMetrics(
|
||||
processor=id, flow=None, name="librarian-request"
|
||||
)
|
||||
|
||||
self.librarian_request_producer = Producer(
|
||||
backend=self.pubsub,
|
||||
topic=librarian_request_q,
|
||||
schema=LibrarianRequest,
|
||||
metrics=librarian_request_metrics,
|
||||
)
|
||||
|
||||
librarian_response_metrics = ConsumerMetrics(
|
||||
processor=id, flow=None, name="librarian-response"
|
||||
)
|
||||
|
||||
self.librarian_response_consumer = Consumer(
|
||||
taskgroup=self.taskgroup,
|
||||
backend=self.pubsub,
|
||||
flow=None,
|
||||
topic=librarian_response_q,
|
||||
subscriber=f"{id}-librarian",
|
||||
schema=LibrarianResponse,
|
||||
handler=self.on_librarian_response,
|
||||
metrics=librarian_response_metrics,
|
||||
)
|
||||
|
||||
# Pending librarian requests: request_id -> asyncio.Future
|
||||
self.pending_librarian_requests = {}
|
||||
|
||||
async def start(self):
|
||||
await super(Processor, self).start()
|
||||
await self.librarian_request_producer.start()
|
||||
await self.librarian_response_consumer.start()
|
||||
|
||||
async def on_librarian_response(self, msg, consumer, flow):
|
||||
"""Handle responses from the librarian service."""
|
||||
response = msg.value()
|
||||
request_id = msg.properties().get("id")
|
||||
|
||||
if request_id in self.pending_librarian_requests:
|
||||
future = self.pending_librarian_requests.pop(request_id)
|
||||
future.set_result(response)
|
||||
else:
|
||||
logger.warning(f"Received unexpected librarian response: {request_id}")
|
||||
|
||||
async def save_answer_content(self, doc_id, user, content, title=None, timeout=120):
|
||||
"""
|
||||
Save answer content to the librarian.
|
||||
|
||||
Args:
|
||||
doc_id: ID for the answer document
|
||||
user: User ID
|
||||
content: Answer text content
|
||||
title: Optional title
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
The document ID on success
|
||||
"""
|
||||
request_id = str(uuid.uuid4())
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
user=user,
|
||||
kind="text/plain",
|
||||
title=title or "Agent Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
user=user,
|
||||
)
|
||||
|
||||
# Create future for response
|
||||
future = asyncio.get_event_loop().create_future()
|
||||
self.pending_librarian_requests[request_id] = future
|
||||
|
||||
try:
|
||||
# Send request
|
||||
await self.librarian_request_producer.send(
|
||||
request, properties={"id": request_id}
|
||||
)
|
||||
|
||||
# Wait for response
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(
|
||||
f"Librarian error saving answer: {response.error.type}: {response.error.message}"
|
||||
)
|
||||
|
||||
return doc_id
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.pending_librarian_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout saving answer document {doc_id}")
|
||||
|
||||
async def on_tools_config(self, config, version):
|
||||
|
||||
logger.info(f"Loading configuration version {version}")
|
||||
|
|
@ -347,6 +464,15 @@ class Processor(AgentService):
|
|||
))
|
||||
logger.debug(f"Emitted session triples for {session_uri}")
|
||||
|
||||
# Send explain event for session
|
||||
if streaming:
|
||||
await respond(AgentResponse(
|
||||
chunk_type="explain",
|
||||
content="",
|
||||
explain_id=session_uri,
|
||||
explain_graph=GRAPH_RETRIEVAL,
|
||||
))
|
||||
|
||||
logger.info(f"Question: {request.question}")
|
||||
|
||||
if len(history) >= self.max_iterations:
|
||||
|
|
@ -504,8 +630,28 @@ class Processor(AgentService):
|
|||
else:
|
||||
parent_uri = session_uri
|
||||
|
||||
# Save answer to librarian
|
||||
answer_doc_id = None
|
||||
if f:
|
||||
answer_doc_id = f"urn:trustgraph:agent:{session_id}/answer"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
doc_id=answer_doc_id,
|
||||
user=request.user,
|
||||
content=f,
|
||||
title=f"Agent Answer: {request.question[:50]}...",
|
||||
)
|
||||
logger.debug(f"Saved answer to librarian: {answer_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save answer to librarian: {e}")
|
||||
answer_doc_id = None # Fall back to inline content
|
||||
|
||||
final_triples = set_graph(
|
||||
agent_final_triples(final_uri, parent_uri, f),
|
||||
agent_final_triples(
|
||||
final_uri, parent_uri,
|
||||
answer="" if answer_doc_id else f,
|
||||
document_id=answer_doc_id,
|
||||
),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await flow("explainability").send(Triples(
|
||||
|
|
@ -518,6 +664,15 @@ class Processor(AgentService):
|
|||
))
|
||||
logger.debug(f"Emitted final triples for {final_uri}")
|
||||
|
||||
# Send explain event for conclusion
|
||||
if streaming:
|
||||
await respond(AgentResponse(
|
||||
chunk_type="explain",
|
||||
content="",
|
||||
explain_id=final_uri,
|
||||
explain_graph=GRAPH_RETRIEVAL,
|
||||
))
|
||||
|
||||
if streaming:
|
||||
# Streaming format - send end-of-dialog marker
|
||||
# Answer chunks were already sent via answer() callback during parsing
|
||||
|
|
@ -558,14 +713,48 @@ class Processor(AgentService):
|
|||
else:
|
||||
parent_uri = session_uri
|
||||
|
||||
# Save thought to librarian
|
||||
thought_doc_id = None
|
||||
if act.thought:
|
||||
thought_doc_id = f"urn:trustgraph:agent:{session_id}/i{iteration_num}/thought"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
doc_id=thought_doc_id,
|
||||
user=request.user,
|
||||
content=act.thought,
|
||||
title=f"Agent Thought: {act.name}",
|
||||
)
|
||||
logger.debug(f"Saved thought to librarian: {thought_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save thought to librarian: {e}")
|
||||
thought_doc_id = None
|
||||
|
||||
# Save observation to librarian
|
||||
observation_doc_id = None
|
||||
if act.observation:
|
||||
observation_doc_id = f"urn:trustgraph:agent:{session_id}/i{iteration_num}/observation"
|
||||
try:
|
||||
await self.save_answer_content(
|
||||
doc_id=observation_doc_id,
|
||||
user=request.user,
|
||||
content=act.observation,
|
||||
title=f"Agent Observation: {act.name}",
|
||||
)
|
||||
logger.debug(f"Saved observation to librarian: {observation_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save observation to librarian: {e}")
|
||||
observation_doc_id = None
|
||||
|
||||
iter_triples = set_graph(
|
||||
agent_iteration_triples(
|
||||
iteration_uri,
|
||||
parent_uri,
|
||||
act.thought,
|
||||
act.name,
|
||||
act.arguments,
|
||||
act.observation,
|
||||
thought="" if thought_doc_id else act.thought,
|
||||
action=act.name,
|
||||
arguments=act.arguments,
|
||||
observation="" if observation_doc_id else act.observation,
|
||||
thought_document_id=thought_doc_id,
|
||||
observation_document_id=observation_doc_id,
|
||||
),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
|
|
@ -579,6 +768,15 @@ class Processor(AgentService):
|
|||
))
|
||||
logger.debug(f"Emitted iteration triples for {iteration_uri}")
|
||||
|
||||
# Send explain event for iteration
|
||||
if streaming:
|
||||
await respond(AgentResponse(
|
||||
chunk_type="explain",
|
||||
content="",
|
||||
explain_id=iteration_uri,
|
||||
explain_graph=GRAPH_RETRIEVAL,
|
||||
))
|
||||
|
||||
history.append(act)
|
||||
|
||||
# Handle state transitions if tool execution was successful
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ class DocumentRag:
|
|||
async def query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
doc_limit=20, streaming=False, chunk_callback=None,
|
||||
explain_callback=None,
|
||||
explain_callback=None, save_answer_callback=None,
|
||||
):
|
||||
"""
|
||||
Execute a Document RAG query with optional explainability tracking.
|
||||
|
|
@ -122,6 +122,7 @@ class DocumentRag:
|
|||
streaming: Enable streaming LLM response
|
||||
chunk_callback: async def callback(chunk, end_of_stream) for streaming
|
||||
explain_callback: async def callback(triples, explain_id) for explainability
|
||||
save_answer_callback: async def callback(doc_id, answer_text) to save answer to librarian
|
||||
|
||||
Returns:
|
||||
str: The synthesized answer text
|
||||
|
|
@ -192,9 +193,28 @@ class DocumentRag:
|
|||
|
||||
# Emit synthesis explainability after answer generated
|
||||
if explain_callback:
|
||||
synthesis_doc_id = None
|
||||
answer_text = resp if resp else ""
|
||||
|
||||
# Save answer to librarian if callback provided
|
||||
if save_answer_callback and answer_text:
|
||||
# Generate document ID as URN matching query-time provenance format
|
||||
synthesis_doc_id = f"urn:trustgraph:docrag:{session_id}/answer"
|
||||
try:
|
||||
await save_answer_callback(synthesis_doc_id, answer_text)
|
||||
if self.verbose:
|
||||
logger.debug(f"Saved answer to librarian: {synthesis_doc_id}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to save answer to librarian: {e}")
|
||||
synthesis_doc_id = None # Fall back to inline content
|
||||
|
||||
# Generate triples with document reference or inline content
|
||||
syn_triples = set_graph(
|
||||
docrag_synthesis_triples(syn_uri, exp_uri, answer_text),
|
||||
docrag_synthesis_triples(
|
||||
syn_uri, exp_uri,
|
||||
answer_text="" if synthesis_doc_id else answer_text,
|
||||
document_id=synthesis_doc_id,
|
||||
),
|
||||
GRAPH_RETRIEVAL
|
||||
)
|
||||
await explain_callback(syn_triples, syn_uri)
|
||||
|
|
|
|||
|
|
@ -8,8 +8,10 @@ import asyncio
|
|||
import base64
|
||||
import logging
|
||||
|
||||
import uuid
|
||||
|
||||
from ... schema import DocumentRagQuery, DocumentRagResponse, Error
|
||||
from ... schema import LibrarianRequest, LibrarianResponse
|
||||
from ... schema import LibrarianRequest, LibrarianResponse, DocumentMetadata
|
||||
from ... schema import librarian_request_queue, librarian_response_queue
|
||||
from ... schema import Triples, Metadata
|
||||
from ... provenance import GRAPH_RETRIEVAL
|
||||
|
|
@ -179,6 +181,62 @@ class Processor(FlowProcessor):
|
|||
self.pending_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout fetching chunk {chunk_id}")
|
||||
|
||||
async def save_answer_content(self, doc_id, user, content, title=None, timeout=120):
|
||||
"""
|
||||
Save answer content to the librarian.
|
||||
|
||||
Args:
|
||||
doc_id: ID for the answer document
|
||||
user: User ID
|
||||
content: Answer text content
|
||||
title: Optional title
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
The document ID on success
|
||||
"""
|
||||
request_id = str(uuid.uuid4())
|
||||
|
||||
doc_metadata = DocumentMetadata(
|
||||
id=doc_id,
|
||||
user=user,
|
||||
kind="text/plain",
|
||||
title=title or "DocumentRAG Answer",
|
||||
document_type="answer",
|
||||
)
|
||||
|
||||
request = LibrarianRequest(
|
||||
operation="add-document",
|
||||
document_id=doc_id,
|
||||
document_metadata=doc_metadata,
|
||||
content=base64.b64encode(content.encode("utf-8")).decode("utf-8"),
|
||||
user=user,
|
||||
)
|
||||
|
||||
# Create future for response
|
||||
future = asyncio.get_event_loop().create_future()
|
||||
self.pending_requests[request_id] = future
|
||||
|
||||
try:
|
||||
# Send request
|
||||
await self.librarian_request_producer.send(
|
||||
request, properties={"id": request_id}
|
||||
)
|
||||
|
||||
# Wait for response
|
||||
response = await asyncio.wait_for(future, timeout=timeout)
|
||||
|
||||
if response.error:
|
||||
raise RuntimeError(
|
||||
f"Librarian error saving answer: {response.error.type}: {response.error.message}"
|
||||
)
|
||||
|
||||
return doc_id
|
||||
|
||||
except asyncio.TimeoutError:
|
||||
self.pending_requests.pop(request_id, None)
|
||||
raise RuntimeError(f"Timeout saving answer document {doc_id}")
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
try:
|
||||
|
|
@ -222,10 +280,20 @@ class Processor(FlowProcessor):
|
|||
response=None,
|
||||
explain_id=explain_id,
|
||||
explain_graph=GRAPH_RETRIEVAL,
|
||||
message_type="explain",
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
|
||||
# Callback to save answer content to librarian
|
||||
async def save_answer(doc_id, answer_text):
|
||||
await self.save_answer_content(
|
||||
doc_id=doc_id,
|
||||
user=v.user,
|
||||
content=answer_text,
|
||||
title=f"DocumentRAG Answer: {v.query[:50]}...",
|
||||
)
|
||||
|
||||
# Check if streaming is requested
|
||||
if v.streaming:
|
||||
# Define async callback for streaming chunks
|
||||
|
|
@ -235,6 +303,7 @@ class Processor(FlowProcessor):
|
|||
DocumentRagResponse(
|
||||
response=chunk,
|
||||
end_of_stream=end_of_stream,
|
||||
message_type="chunk",
|
||||
error=None
|
||||
),
|
||||
properties={"id": id}
|
||||
|
|
@ -250,6 +319,17 @@ class Processor(FlowProcessor):
|
|||
streaming=True,
|
||||
chunk_callback=send_chunk,
|
||||
explain_callback=send_explainability,
|
||||
save_answer_callback=save_answer,
|
||||
)
|
||||
|
||||
# Send end_of_session to signal entire session is complete
|
||||
await flow("response").send(
|
||||
DocumentRagResponse(
|
||||
response=None,
|
||||
end_of_session=True,
|
||||
message_type="end",
|
||||
),
|
||||
properties={"id": id}
|
||||
)
|
||||
else:
|
||||
# Non-streaming path (existing behavior)
|
||||
|
|
@ -259,6 +339,7 @@ class Processor(FlowProcessor):
|
|||
collection=v.collection,
|
||||
doc_limit=doc_limit,
|
||||
explain_callback=send_explainability,
|
||||
save_answer_callback=save_answer,
|
||||
)
|
||||
|
||||
await flow("response").send(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue