mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Refactor agent provenance so that the decision (thought + tool selection) and the result (observation) are separate DAG entities: Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion Analysis gains tg:ToolUse as a mixin RDF type and is emitted before tool execution via an on_action callback in react(). This ensures sub-traces (e.g. GraphRAG) appear after their parent Analysis in the streaming event order. Observation becomes a standalone prov:Entity with tg:Observation type, emitted after tool execution. The linear DAG chain runs through Observation — subsequent iterations and the Conclusion derive from it, not from the Analysis. message_id is populated on streaming AgentResponse for thought and observation chunks, using the provenance URI of the entity being built. This lets clients group streamed chunks by entity. Wire changes: - provenance/agent.py: Add ToolUse type, new agent_observation_triples(), remove observation from iteration - agent_manager.py: Add on_action callback between reason() and tool execution - orchestrator/pattern_base.py: Split emit, wire message_id, chain through observation URIs - orchestrator/react_pattern.py: Emit Analysis via on_action before tool runs - agent/react/service.py: Same for non-orchestrator path - api/explainability.py: New Observation class, updated dispatch and chain walker - api/types.py: Add message_id to AgentThought/AgentObservation - cli: Render Observation separately, [analysis: tool] labels
261 lines
7.9 KiB
Python
261 lines
7.9 KiB
Python
"""
|
|
Provenance module for extraction-time provenance support.
|
|
|
|
Provides helpers for:
|
|
- URI generation for documents, pages, chunks, activities, subgraphs
|
|
- PROV-O triple building for provenance metadata
|
|
- Vocabulary bootstrap for per-collection initialization
|
|
|
|
Usage example:
|
|
|
|
from trustgraph.provenance import (
|
|
document_uri, page_uri, chunk_uri,
|
|
document_triples, derived_entity_triples,
|
|
get_vocabulary_triples,
|
|
)
|
|
|
|
# Generate URIs
|
|
doc_uri = document_uri("my-doc-123")
|
|
pg_uri = page_uri()
|
|
|
|
# Build provenance triples
|
|
triples = document_triples(
|
|
doc_uri,
|
|
title="My Document",
|
|
mime_type="application/pdf",
|
|
page_count=10,
|
|
)
|
|
|
|
# Get vocabulary bootstrap triples (once per collection)
|
|
vocab_triples = get_vocabulary_triples()
|
|
"""
|
|
|
|
# URI generation
|
|
from . uris import (
|
|
TRUSTGRAPH_BASE,
|
|
document_uri,
|
|
page_uri,
|
|
section_uri,
|
|
chunk_uri,
|
|
image_uri,
|
|
activity_uri,
|
|
subgraph_uri,
|
|
agent_uri,
|
|
# Query-time provenance URIs (GraphRAG)
|
|
question_uri,
|
|
grounding_uri,
|
|
exploration_uri,
|
|
focus_uri,
|
|
synthesis_uri,
|
|
# Agent provenance URIs
|
|
agent_session_uri,
|
|
agent_iteration_uri,
|
|
agent_thought_uri,
|
|
agent_observation_uri,
|
|
agent_final_uri,
|
|
# Orchestrator provenance URIs
|
|
agent_decomposition_uri,
|
|
agent_finding_uri,
|
|
agent_plan_uri,
|
|
agent_step_result_uri,
|
|
agent_synthesis_uri,
|
|
# Document RAG provenance URIs
|
|
docrag_question_uri,
|
|
docrag_grounding_uri,
|
|
docrag_exploration_uri,
|
|
docrag_synthesis_uri,
|
|
)
|
|
|
|
# Namespace constants
|
|
from . namespaces import (
|
|
# PROV-O
|
|
PROV, PROV_ENTITY, PROV_ACTIVITY, PROV_AGENT,
|
|
PROV_WAS_DERIVED_FROM, PROV_WAS_GENERATED_BY,
|
|
PROV_USED, PROV_WAS_ASSOCIATED_WITH, PROV_STARTED_AT_TIME,
|
|
# Dublin Core
|
|
DC, DC_TITLE, DC_SOURCE, DC_DATE, DC_CREATOR,
|
|
# RDF/RDFS
|
|
RDF, RDF_TYPE, RDFS, RDFS_LABEL,
|
|
# TrustGraph
|
|
TG, TG_CONTAINS, TG_PAGE_COUNT, TG_MIME_TYPE, TG_PAGE_NUMBER,
|
|
TG_CHUNK_INDEX, TG_CHAR_OFFSET, TG_CHAR_LENGTH,
|
|
TG_CHUNK_SIZE, TG_CHUNK_OVERLAP, TG_COMPONENT_VERSION,
|
|
TG_LLM_MODEL, TG_ONTOLOGY, TG_EMBEDDING_MODEL,
|
|
TG_SOURCE_TEXT, TG_SOURCE_CHAR_OFFSET, TG_SOURCE_CHAR_LENGTH,
|
|
TG_ELEMENT_TYPES, TG_TABLE_COUNT, TG_IMAGE_COUNT,
|
|
# Extraction provenance entity types
|
|
TG_DOCUMENT_TYPE, TG_PAGE_TYPE, TG_SECTION_TYPE, TG_CHUNK_TYPE,
|
|
TG_IMAGE_TYPE, TG_SUBGRAPH_TYPE,
|
|
# Query-time provenance predicates (GraphRAG)
|
|
TG_QUERY, TG_CONCEPT, TG_ENTITY,
|
|
TG_EDGE_COUNT, TG_SELECTED_EDGE, TG_REASONING,
|
|
# Query-time provenance predicates (DocumentRAG)
|
|
TG_CHUNK_COUNT, TG_SELECTED_CHUNK,
|
|
# Explainability entity types
|
|
TG_QUESTION, TG_GROUNDING, TG_EXPLORATION, TG_FOCUS, TG_SYNTHESIS,
|
|
TG_ANALYSIS, TG_CONCLUSION,
|
|
# Unifying types
|
|
TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
|
|
TG_TOOL_USE,
|
|
# Question subtypes (to distinguish retrieval mechanism)
|
|
TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION,
|
|
# Agent provenance predicates
|
|
TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
|
|
TG_SUBAGENT_GOAL, TG_PLAN_STEP,
|
|
# Orchestrator entity types
|
|
TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
|
|
# Document reference predicate
|
|
TG_DOCUMENT,
|
|
# Named graphs
|
|
GRAPH_DEFAULT, GRAPH_SOURCE, GRAPH_RETRIEVAL,
|
|
)
|
|
|
|
# Triple builders
|
|
from . triples import (
|
|
document_triples,
|
|
derived_entity_triples,
|
|
subgraph_provenance_triples,
|
|
# Query-time provenance triple builders (GraphRAG)
|
|
question_triples,
|
|
grounding_triples,
|
|
exploration_triples,
|
|
focus_triples,
|
|
synthesis_triples,
|
|
# Query-time provenance triple builders (DocumentRAG)
|
|
docrag_question_triples,
|
|
docrag_exploration_triples,
|
|
docrag_synthesis_triples,
|
|
# Utility
|
|
set_graph,
|
|
)
|
|
|
|
# Agent provenance triple builders
|
|
from . agent import (
|
|
agent_session_triples,
|
|
agent_iteration_triples,
|
|
agent_observation_triples,
|
|
agent_final_triples,
|
|
# Orchestrator provenance triple builders
|
|
agent_decomposition_triples,
|
|
agent_finding_triples,
|
|
agent_plan_triples,
|
|
agent_step_result_triples,
|
|
agent_synthesis_triples,
|
|
)
|
|
|
|
# Vocabulary bootstrap
|
|
from . vocabulary import (
|
|
get_vocabulary_triples,
|
|
PROV_CLASS_LABELS,
|
|
PROV_PREDICATE_LABELS,
|
|
DC_PREDICATE_LABELS,
|
|
TG_CLASS_LABELS,
|
|
TG_PREDICATE_LABELS,
|
|
)
|
|
|
|
__all__ = [
|
|
# URIs
|
|
"TRUSTGRAPH_BASE",
|
|
"document_uri",
|
|
"page_uri",
|
|
"section_uri",
|
|
"chunk_uri",
|
|
"image_uri",
|
|
"activity_uri",
|
|
"subgraph_uri",
|
|
"agent_uri",
|
|
# Query-time provenance URIs
|
|
"question_uri",
|
|
"grounding_uri",
|
|
"exploration_uri",
|
|
"focus_uri",
|
|
"synthesis_uri",
|
|
# Agent provenance URIs
|
|
"agent_session_uri",
|
|
"agent_iteration_uri",
|
|
"agent_thought_uri",
|
|
"agent_observation_uri",
|
|
"agent_final_uri",
|
|
# Orchestrator provenance URIs
|
|
"agent_decomposition_uri",
|
|
"agent_finding_uri",
|
|
"agent_plan_uri",
|
|
"agent_step_result_uri",
|
|
"agent_synthesis_uri",
|
|
# Document RAG provenance URIs
|
|
"docrag_question_uri",
|
|
"docrag_grounding_uri",
|
|
"docrag_exploration_uri",
|
|
"docrag_synthesis_uri",
|
|
# Namespaces
|
|
"PROV", "PROV_ENTITY", "PROV_ACTIVITY", "PROV_AGENT",
|
|
"PROV_WAS_DERIVED_FROM", "PROV_WAS_GENERATED_BY",
|
|
"PROV_USED", "PROV_WAS_ASSOCIATED_WITH", "PROV_STARTED_AT_TIME",
|
|
"DC", "DC_TITLE", "DC_SOURCE", "DC_DATE", "DC_CREATOR",
|
|
"RDF", "RDF_TYPE", "RDFS", "RDFS_LABEL",
|
|
"TG", "TG_CONTAINS", "TG_PAGE_COUNT", "TG_MIME_TYPE", "TG_PAGE_NUMBER",
|
|
"TG_CHUNK_INDEX", "TG_CHAR_OFFSET", "TG_CHAR_LENGTH",
|
|
"TG_CHUNK_SIZE", "TG_CHUNK_OVERLAP", "TG_COMPONENT_VERSION",
|
|
"TG_LLM_MODEL", "TG_ONTOLOGY", "TG_EMBEDDING_MODEL",
|
|
"TG_SOURCE_TEXT", "TG_SOURCE_CHAR_OFFSET", "TG_SOURCE_CHAR_LENGTH",
|
|
"TG_ELEMENT_TYPES", "TG_TABLE_COUNT", "TG_IMAGE_COUNT",
|
|
# Extraction provenance entity types
|
|
"TG_DOCUMENT_TYPE", "TG_PAGE_TYPE", "TG_SECTION_TYPE",
|
|
"TG_CHUNK_TYPE", "TG_IMAGE_TYPE", "TG_SUBGRAPH_TYPE",
|
|
# Query-time provenance predicates (GraphRAG)
|
|
"TG_QUERY", "TG_CONCEPT", "TG_ENTITY",
|
|
"TG_EDGE_COUNT", "TG_SELECTED_EDGE", "TG_REASONING",
|
|
# Query-time provenance predicates (DocumentRAG)
|
|
"TG_CHUNK_COUNT", "TG_SELECTED_CHUNK",
|
|
# Explainability entity types
|
|
"TG_QUESTION", "TG_GROUNDING", "TG_EXPLORATION", "TG_FOCUS", "TG_SYNTHESIS",
|
|
"TG_ANALYSIS", "TG_CONCLUSION",
|
|
# Unifying types
|
|
"TG_ANSWER_TYPE", "TG_REFLECTION_TYPE", "TG_THOUGHT_TYPE", "TG_OBSERVATION_TYPE",
|
|
"TG_TOOL_USE",
|
|
# Question subtypes
|
|
"TG_GRAPH_RAG_QUESTION", "TG_DOC_RAG_QUESTION", "TG_AGENT_QUESTION",
|
|
# Agent provenance predicates
|
|
"TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION",
|
|
"TG_SUBAGENT_GOAL", "TG_PLAN_STEP",
|
|
# Orchestrator entity types
|
|
"TG_DECOMPOSITION", "TG_FINDING", "TG_PLAN_TYPE", "TG_STEP_RESULT",
|
|
# Document reference predicate
|
|
"TG_DOCUMENT",
|
|
# Named graphs
|
|
"GRAPH_DEFAULT", "GRAPH_SOURCE", "GRAPH_RETRIEVAL",
|
|
# Triple builders
|
|
"document_triples",
|
|
"derived_entity_triples",
|
|
"subgraph_provenance_triples",
|
|
# Query-time provenance triple builders (GraphRAG)
|
|
"question_triples",
|
|
"grounding_triples",
|
|
"exploration_triples",
|
|
"focus_triples",
|
|
"synthesis_triples",
|
|
# Query-time provenance triple builders (DocumentRAG)
|
|
"docrag_question_triples",
|
|
"docrag_exploration_triples",
|
|
"docrag_synthesis_triples",
|
|
# Agent provenance triple builders
|
|
"agent_session_triples",
|
|
"agent_iteration_triples",
|
|
"agent_observation_triples",
|
|
"agent_final_triples",
|
|
# Orchestrator provenance triple builders
|
|
"agent_decomposition_triples",
|
|
"agent_finding_triples",
|
|
"agent_plan_triples",
|
|
"agent_step_result_triples",
|
|
"agent_synthesis_triples",
|
|
# Utility
|
|
"set_graph",
|
|
# Vocabulary
|
|
"get_vocabulary_triples",
|
|
"PROV_CLASS_LABELS",
|
|
"PROV_PREDICATE_LABELS",
|
|
"DC_PREDICATE_LABELS",
|
|
"TG_CLASS_LABELS",
|
|
"TG_PREDICATE_LABELS",
|
|
]
|