Split Analysis into Analysis+ToolUse and Observation, add message_id (#747)

Refactor agent provenance so that the decision (thought + tool selection) and the result (observation) are separate DAG entities: Question ← Analysis+ToolUse ← Observation ← ... ← Conclusion Analysis gains tg:ToolUse as a mixin RDF type and is emitted before tool execution via an on_action callback in react(). This ensures sub-traces (e.g. GraphRAG) appear after their parent Analysis in the streaming event order. Observation becomes a standalone prov:Entity with tg:Observation type, emitted after tool execution. The linear DAG chain runs through Observation — subsequent iterations and the Conclusion derive from it, not from the Analysis. message_id is populated on streaming AgentResponse for thought and observation chunks, using the provenance URI of the entity being built. This lets clients group streamed chunks by entity. Wire changes: - provenance/agent.py: Add ToolUse type, new agent_observation_triples(), remove observation from iteration - agent_manager.py: Add on_action callback between reason() and tool execution - orchestrator/pattern_base.py: Split emit, wire message_id, chain through observation URIs - orchestrator/react_pattern.py: Emit Analysis via on_action before tool runs - agent/react/service.py: Same for non-orchestrator path - api/explainability.py: New Observation class, updated dispatch and chain walker - api/types.py: Add message_id to AgentThought/AgentObservation - cli: Render Observation separately, [analysis: tool] labels
2026-04-25 00:16:23 +02:00 · 2026-03-31 17:51:22 +01:00 · 2026-03-31 17:51:22 +01:00 · 153ae9ad30
commit 153ae9ad30
parent 89e13a756a
28 changed files with 661 additions and 350 deletions
--- a/trustgraph-base/trustgraph/api/init.py
+++ b/trustgraph-base/trustgraph/api/init.py
@ -81,6 +81,7 @@ from .explainability import (
    Synthesis,
    Reflection,
    Analysis,
+    Observation,
    Conclusion,
    Decomposition,
    Finding,
@ -164,6 +165,7 @@ __all__ = [
    "Focus",
    "Synthesis",
    "Analysis",
+    "Observation",
    "Conclusion",
    "EdgeSelection",
    "wire_triples_to_tuples",
--- a/trustgraph-base/trustgraph/api/explainability.py
+++ b/trustgraph-base/trustgraph/api/explainability.py
@ -40,6 +40,7 @@ TG_ANSWER_TYPE = TG + "Answer"
 TG_REFLECTION_TYPE = TG + "Reflection"
 TG_THOUGHT_TYPE = TG + "Thought"
 TG_OBSERVATION_TYPE = TG + "Observation"
+TG_TOOL_USE = TG + "ToolUse"
 TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
 TG_DOC_RAG_QUESTION = TG + "DocRagQuestion"
 TG_AGENT_QUESTION = TG + "AgentQuestion"
@ -58,7 +59,6 @@ TG_PLAN_STEP = TG + "planStep"
 PROV = "http://www.w3.org/ns/prov#"
 PROV_STARTED_AT_TIME = PROV + "startedAtTime"
 PROV_WAS_DERIVED_FROM = PROV + "wasDerivedFrom"
-PROV_WAS_GENERATED_BY = PROV + "wasGeneratedBy"

 RDF_TYPE = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
 RDFS_LABEL = "http://www.w3.org/2000/01/rdf-schema#label"
@ -102,6 +102,8 @@ class ExplainEntity:
            return StepResult.from_triples(uri, triples)
        elif TG_SYNTHESIS in types:
            return Synthesis.from_triples(uri, triples)
+        elif TG_OBSERVATION_TYPE in types and TG_REFLECTION_TYPE not in types:
+            return Observation.from_triples(uri, triples)
        elif TG_REFLECTION_TYPE in types:
            return Reflection.from_triples(uri, triples)
        elif TG_ANALYSIS in types:
@ -279,18 +281,16 @@ class Reflection(ExplainEntity):

@dataclass
 class Analysis(ExplainEntity):
-    """Analysis entity - one think/act/observe cycle (Agent only)."""
+    """Analysis+ToolUse entity - decision + tool call (Agent only)."""
    action: str = ""
    arguments: str = ""  # JSON string
    thought: str = ""
-    observation: str = ""

    @classmethod
    def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Analysis":
        action = ""
        arguments = ""
        thought = ""
-        observation = ""

        for s, p, o in triples:
            if p == TG_ACTION:
@ -299,8 +299,6 @@ class Analysis(ExplainEntity):
                arguments = o
            elif p == TG_THOUGHT:
                thought = o
-            elif p == TG_OBSERVATION:
-                observation = o

        return cls(
            uri=uri,
@ -308,7 +306,26 @@ class Analysis(ExplainEntity):
            action=action,
            arguments=arguments,
            thought=thought,
-            observation=observation
+        )
+
+
+@dataclass
+class Observation(ExplainEntity):
+    """Observation entity - standalone tool result (Agent only)."""
+    document: str = ""
+
+    @classmethod
+    def from_triples(cls, uri: str, triples: List[Tuple[str, str, Any]]) -> "Observation":
+        document = ""
+
+        for s, p, o in triples:
+            if p == TG_DOCUMENT:
+                document = o
+
+        return cls(
+            uri=uri,
+            entity_type="observation",
+            document=document,
        )


@ -757,9 +774,9 @@ class ExplainabilityClient:
            return trace
        trace["question"] = question

-        # Find grounding: ?grounding prov:wasGeneratedBy question_uri
+        # Find grounding: ?grounding prov:wasDerivedFrom question_uri
        grounding_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
+            p=PROV_WAS_DERIVED_FROM,
            o=question_uri,
            g=graph,
            user=user,
@ -894,9 +911,9 @@ class ExplainabilityClient:
            return trace
        trace["question"] = question

-        # Find grounding: ?grounding prov:wasGeneratedBy question_uri
+        # Find grounding: ?grounding prov:wasDerivedFrom question_uri
        grounding_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
+            p=PROV_WAS_DERIVED_FROM,
            o=question_uri,
            g=graph,
            user=user,
@ -1010,41 +1027,26 @@ class ExplainabilityClient:
        # Follow the provenance chain from the question
        self._follow_provenance_chain(
            session_uri, trace, graph, user, collection,
-            is_first=True, max_depth=50,
+            max_depth=50,
        )

        return trace

    def _follow_provenance_chain(
        self, current_uri, trace, graph, user, collection,
-        is_first=False, max_depth=50,
+        max_depth=50,
    ):
        """Recursively follow the provenance chain, handling branches."""
        if max_depth <= 0:
            return

        # Find entities derived from current_uri
-        if is_first:
-            derived_triples = self.flow.triples_query(
-                p=PROV_WAS_GENERATED_BY,
-                o=current_uri,
-                g=graph, user=user, collection=collection,
-                limit=20
-            )
-            if not derived_triples:
-                derived_triples = self.flow.triples_query(
-                    p=PROV_WAS_DERIVED_FROM,
-                    o=current_uri,
-                    g=graph, user=user, collection=collection,
-                    limit=20
-                )
-        else:
-            derived_triples = self.flow.triples_query(
-                p=PROV_WAS_DERIVED_FROM,
-                o=current_uri,
-                g=graph, user=user, collection=collection,
-                limit=20
-            )
+        derived_triples = self.flow.triples_query(
+            p=PROV_WAS_DERIVED_FROM,
+            o=current_uri,
+            g=graph, user=user, collection=collection,
+            limit=20
+        )

        if not derived_triples:
            return
@ -1062,8 +1064,8 @@ class ExplainabilityClient:
            if entity is None:
                continue

-            if isinstance(entity, (Analysis, Decomposition, Finding,
-                                   Plan, StepResult)):
+            if isinstance(entity, (Analysis, Observation, Decomposition,
+                                   Finding, Plan, StepResult)):
                trace["steps"].append(entity)

                # Continue following from this entity
@ -1072,6 +1074,27 @@ class ExplainabilityClient:
                    max_depth=max_depth - 1,
                )

+            elif isinstance(entity, Question):
+                # Sub-trace: a RAG session linked to this agent step.
+                # Fetch the full sub-trace and embed it.
+                if entity.question_type == "graph-rag":
+                    sub_trace = self.fetch_graphrag_trace(
+                        derived_uri, graph, user, collection,
+                    )
+                elif entity.question_type == "document-rag":
+                    sub_trace = self.fetch_docrag_trace(
+                        derived_uri, graph, user, collection,
+                    )
+                else:
+                    sub_trace = None
+
+                if sub_trace:
+                    trace["steps"].append({
+                        "type": "sub-trace",
+                        "question": entity,
+                        "trace": sub_trace,
+                    })
+
            elif isinstance(entity, (Conclusion, Synthesis)):
                trace["steps"].append(entity)

@ -1114,10 +1137,25 @@ class ExplainabilityClient:
                if isinstance(entity, Question):
                    questions.append(entity)

-        # Sort by timestamp (newest first)
-        questions.sort(key=lambda q: q.timestamp or "", reverse=True)
+        # Filter out sub-traces: sessions that have a wasDerivedFrom link
+        # (they are child sessions linked to a parent agent iteration)
+        top_level = []
+        for q in questions:
+            parent_triples = self.flow.triples_query(
+                s=q.uri,
+                p=PROV_WAS_DERIVED_FROM,
+                g=graph,
+                user=user,
+                collection=collection,
+                limit=1
+            )
+            if not parent_triples:
+                top_level.append(q)

-        return questions
+        # Sort by timestamp (newest first)
+        top_level.sort(key=lambda q: q.timestamp or "", reverse=True)
+
+        return top_level

    def detect_session_type(
        self,
@ -1159,18 +1197,9 @@ class ExplainabilityClient:
            limit=5
        )

-        generated_triples = self.flow.triples_query(
-            p=PROV_WAS_GENERATED_BY,
-            o=session_uri,
-            g=graph,
-            user=user,
-            collection=collection,
-            limit=5
-        )
-
        all_child_uris = [
            extract_term_value(t.get("s", {}))
-            for t in (derived_triples + generated_triples)
+            for t in derived_triples
        ]

        for child_uri in all_child_uris:
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -384,12 +384,14 @@ class SocketClient:
        if chunk_type == "thought":
            return AgentThought(
                content=resp.get("content", ""),
-                end_of_message=resp.get("end_of_message", False)
+                end_of_message=resp.get("end_of_message", False),
+                message_id=resp.get("message_id", ""),
            )
        elif chunk_type == "observation":
            return AgentObservation(
                content=resp.get("content", ""),
-                end_of_message=resp.get("end_of_message", False)
+                end_of_message=resp.get("end_of_message", False),
+                message_id=resp.get("message_id", ""),
            )
        elif chunk_type == "answer" or chunk_type == "final-answer":
            return AgentAnswer(
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@ -150,8 +150,10 @@ class AgentThought(StreamingChunk):
        content: Agent's thought text
        end_of_message: True if this completes the current thought
        chunk_type: Always "thought"
+        message_id: Provenance URI of the entity being built
    """
    chunk_type: str = "thought"
+    message_id: str = ""

@dataclasses.dataclass
 class AgentObservation(StreamingChunk):
@ -165,8 +167,10 @@ class AgentObservation(StreamingChunk):
        content: Observation text describing tool results
        end_of_message: True if this completes the current observation
        chunk_type: Always "observation"
+        message_id: Provenance URI of the entity being built
    """
    chunk_type: str = "observation"
+    message_id: str = ""

@dataclasses.dataclass
 class AgentAnswer(StreamingChunk):
--- a/trustgraph-base/trustgraph/base/graph_rag_client.py
+++ b/trustgraph-base/trustgraph/base/graph_rag_client.py
@ -5,6 +5,7 @@ from .. schema import GraphRagQuery, GraphRagResponse
 class GraphRagClient(RequestResponse):
    async def rag(self, query, user="trustgraph", collection="default",
                  chunk_callback=None, explain_callback=None,
+                  parent_uri="",
                  timeout=600):
        """
        Execute a graph RAG query with optional streaming callbacks.
@ -50,6 +51,7 @@ class GraphRagClient(RequestResponse):
                query = query,
                user = user,
                collection = collection,
+                parent_uri = parent_uri,
            ),
            timeout=timeout,
            recipient=recipient,
--- a/trustgraph-base/trustgraph/provenance/init.py
+++ b/trustgraph-base/trustgraph/provenance/init.py
@ -96,6 +96,7 @@ from . namespaces import (
    TG_ANALYSIS, TG_CONCLUSION,
    # Unifying types
    TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
+    TG_TOOL_USE,
    # Question subtypes (to distinguish retrieval mechanism)
    TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION, TG_AGENT_QUESTION,
    # Agent provenance predicates
@ -132,6 +133,7 @@ from . triples import (
 from . agent import (
    agent_session_triples,
    agent_iteration_triples,
+    agent_observation_triples,
    agent_final_triples,
    # Orchestrator provenance triple builders
    agent_decomposition_triples,
@ -210,6 +212,7 @@ __all__ = [
    "TG_ANALYSIS", "TG_CONCLUSION",
    # Unifying types
    "TG_ANSWER_TYPE", "TG_REFLECTION_TYPE", "TG_THOUGHT_TYPE", "TG_OBSERVATION_TYPE",
+    "TG_TOOL_USE",
    # Question subtypes
    "TG_GRAPH_RAG_QUESTION", "TG_DOC_RAG_QUESTION", "TG_AGENT_QUESTION",
    # Agent provenance predicates
@ -238,6 +241,7 @@ __all__ = [
    # Agent provenance triple builders
    "agent_session_triples",
    "agent_iteration_triples",
+    "agent_observation_triples",
    "agent_final_triples",
    # Orchestrator provenance triple builders
    "agent_decomposition_triples",
--- a/trustgraph-base/trustgraph/provenance/agent.py
+++ b/trustgraph-base/trustgraph/provenance/agent.py
@ -20,11 +20,12 @@ from .. schema import Triple, Term, IRI, LITERAL

 from . namespaces import (
    RDF_TYPE, RDFS_LABEL,
-    PROV_ACTIVITY, PROV_ENTITY, PROV_WAS_DERIVED_FROM,
-    PROV_WAS_GENERATED_BY, PROV_STARTED_AT_TIME,
-    TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
+    PROV_ENTITY, PROV_WAS_DERIVED_FROM,
+    PROV_STARTED_AT_TIME,
+    TG_QUERY, TG_THOUGHT, TG_ACTION, TG_ARGUMENTS,
    TG_QUESTION, TG_ANALYSIS, TG_CONCLUSION, TG_DOCUMENT,
    TG_ANSWER_TYPE, TG_REFLECTION_TYPE, TG_THOUGHT_TYPE, TG_OBSERVATION_TYPE,
+    TG_TOOL_USE,
    TG_AGENT_QUESTION,
    TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
    TG_SYNTHESIS, TG_SUBAGENT_GOAL, TG_PLAN_STEP,
@ -70,7 +71,7 @@ def agent_session_triples(
        timestamp = datetime.utcnow().isoformat() + "Z"

    return [
-        _triple(session_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
+        _triple(session_uri, RDF_TYPE, _iri(PROV_ENTITY)),
        _triple(session_uri, RDF_TYPE, _iri(TG_QUESTION)),
        _triple(session_uri, RDF_TYPE, _iri(TG_AGENT_QUESTION)),
        _triple(session_uri, RDFS_LABEL, _literal("Agent Question")),
@ -87,19 +88,15 @@ def agent_iteration_triples(
    arguments: Dict[str, Any] = None,
    thought_uri: Optional[str] = None,
    thought_document_id: Optional[str] = None,
-    observation_uri: Optional[str] = None,
-    observation_document_id: Optional[str] = None,
 ) -> List[Triple]:
    """
-    Build triples for one agent iteration (Analysis - think/act/observe cycle).
+    Build triples for one agent iteration (Analysis+ToolUse).

    Creates:
-    - Entity declaration with tg:Analysis type
-    - wasGeneratedBy link to question (if first iteration)
-    - wasDerivedFrom link to previous iteration (if not first)
+    - Entity declaration with tg:Analysis and tg:ToolUse types
+    - wasDerivedFrom link to question (if first iteration) or previous
    - Action and arguments metadata
    - Thought sub-entity (tg:Reflection, tg:Thought) with librarian document
-    - Observation sub-entity (tg:Reflection, tg:Observation) with librarian document

    Args:
        iteration_uri: URI of this iteration (from agent_iteration_uri)
@ -109,8 +106,6 @@ def agent_iteration_triples(
        arguments: Arguments passed to the tool (will be JSON-encoded)
        thought_uri: URI for the thought sub-entity
        thought_document_id: Document URI for thought in librarian
-        observation_uri: URI for the observation sub-entity
-        observation_document_id: Document URI for observation in librarian

    Returns:
        List of Triple objects
@ -121,6 +116,7 @@ def agent_iteration_triples(
    triples = [
        _triple(iteration_uri, RDF_TYPE, _iri(PROV_ENTITY)),
        _triple(iteration_uri, RDF_TYPE, _iri(TG_ANALYSIS)),
+        _triple(iteration_uri, RDF_TYPE, _iri(TG_TOOL_USE)),
        _triple(iteration_uri, RDFS_LABEL, _literal(f"Analysis: {action}")),
        _triple(iteration_uri, TG_ACTION, _literal(action)),
        _triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
@ -128,7 +124,7 @@ def agent_iteration_triples(

    if question_uri:
        triples.append(
-            _triple(iteration_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
+            _triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
        )
    elif previous_uri:
        triples.append(
@ -142,26 +138,48 @@ def agent_iteration_triples(
            _triple(thought_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
            _triple(thought_uri, RDF_TYPE, _iri(TG_THOUGHT_TYPE)),
            _triple(thought_uri, RDFS_LABEL, _literal("Thought")),
-            _triple(thought_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
+            _triple(thought_uri, PROV_WAS_DERIVED_FROM, _iri(iteration_uri)),
        ])
        if thought_document_id:
            triples.append(
                _triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
            )

-    # Observation sub-entity
-    if observation_uri:
-        triples.extend([
-            _triple(iteration_uri, TG_OBSERVATION, _iri(observation_uri)),
-            _triple(observation_uri, RDF_TYPE, _iri(TG_REFLECTION_TYPE)),
-            _triple(observation_uri, RDF_TYPE, _iri(TG_OBSERVATION_TYPE)),
-            _triple(observation_uri, RDFS_LABEL, _literal("Observation")),
-            _triple(observation_uri, PROV_WAS_GENERATED_BY, _iri(iteration_uri)),
-        ])
-        if observation_document_id:
-            triples.append(
-                _triple(observation_uri, TG_DOCUMENT, _iri(observation_document_id))
-            )
+    return triples
+
+
+def agent_observation_triples(
+    observation_uri: str,
+    iteration_uri: str,
+    document_id: Optional[str] = None,
+) -> List[Triple]:
+    """
+    Build triples for an agent observation (standalone entity).
+
+    Creates:
+    - Entity declaration with prov:Entity and tg:Observation types
+    - wasDerivedFrom link to the iteration (Analysis+ToolUse)
+    - Document reference to librarian (if provided)
+
+    Args:
+        observation_uri: URI of the observation entity
+        iteration_uri: URI of the iteration this observation derives from
+        document_id: Librarian document ID for the observation content
+
+    Returns:
+        List of Triple objects
+    """
+    triples = [
+        _triple(observation_uri, RDF_TYPE, _iri(PROV_ENTITY)),
+        _triple(observation_uri, RDF_TYPE, _iri(TG_OBSERVATION_TYPE)),
+        _triple(observation_uri, RDFS_LABEL, _literal("Observation")),
+        _triple(observation_uri, PROV_WAS_DERIVED_FROM, _iri(iteration_uri)),
+    ]
+
+    if document_id:
+        triples.append(
+            _triple(observation_uri, TG_DOCUMENT, _iri(document_id))
+        )

    return triples

@ -199,7 +217,7 @@ def agent_final_triples(

    if question_uri:
        triples.append(
-            _triple(final_uri, PROV_WAS_GENERATED_BY, _iri(question_uri))
+            _triple(final_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
        )
    elif previous_uri:
        triples.append(
@ -223,7 +241,7 @@ def agent_decomposition_triples(
        _triple(uri, RDF_TYPE, _iri(TG_DECOMPOSITION)),
        _triple(uri, RDFS_LABEL,
                _literal(f"Decomposed into {len(goals)} research threads")),
-        _triple(uri, PROV_WAS_GENERATED_BY, _iri(session_uri)),
+        _triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
    ]
    for goal in goals:
        triples.append(_triple(uri, TG_SUBAGENT_GOAL, _literal(goal)))
@ -261,7 +279,7 @@ def agent_plan_triples(
        _triple(uri, RDF_TYPE, _iri(TG_PLAN_TYPE)),
        _triple(uri, RDFS_LABEL,
                _literal(f"Plan with {len(steps)} steps")),
-        _triple(uri, PROV_WAS_GENERATED_BY, _iri(session_uri)),
+        _triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
    ]
    for step in steps:
        triples.append(_triple(uri, TG_PLAN_STEP, _literal(step)))
--- a/trustgraph-base/trustgraph/provenance/namespaces.py
+++ b/trustgraph-base/trustgraph/provenance/namespaces.py
@ -105,6 +105,7 @@ TG_ANSWER_TYPE = TG + "Answer"          # Final answer (Synthesis, Conclusion, F
 TG_REFLECTION_TYPE = TG + "Reflection"  # Intermediate commentary (Thought, Observation)
 TG_THOUGHT_TYPE = TG + "Thought"        # Agent reasoning
 TG_OBSERVATION_TYPE = TG + "Observation" # Agent tool result
+TG_TOOL_USE = TG + "ToolUse"            # Analysis+ToolUse mixin

 # Question subtypes (to distinguish retrieval mechanism)
 TG_GRAPH_RAG_QUESTION = TG + "GraphRagQuestion"
--- a/trustgraph-base/trustgraph/provenance/triples.py
+++ b/trustgraph-base/trustgraph/provenance/triples.py
@ -353,18 +353,21 @@ def question_triples(
    question_uri: str,
    query: str,
    timestamp: Optional[str] = None,
+    parent_uri: Optional[str] = None,
 ) -> List[Triple]:
    """
-    Build triples for a question activity.
+    Build triples for a question entity.

    Creates:
-    - Activity declaration for the question
+    - Entity declaration for the question
    - Query text and timestamp
+    - Optional wasDerivedFrom link to parent (for sub-traces)

    Args:
        question_uri: URI of the question (from question_uri)
        query: The user's query text
        timestamp: ISO timestamp (defaults to now)
+        parent_uri: Optional parent URI to link as wasDerivedFrom (for sub-traces)

    Returns:
        List of Triple objects
@ -372,8 +375,8 @@ def question_triples(
    if timestamp is None:
        timestamp = datetime.utcnow().isoformat() + "Z"

-    return [
-        _triple(question_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
+    triples = [
+        _triple(question_uri, RDF_TYPE, _iri(PROV_ENTITY)),
        _triple(question_uri, RDF_TYPE, _iri(TG_QUESTION)),
        _triple(question_uri, RDF_TYPE, _iri(TG_GRAPH_RAG_QUESTION)),
        _triple(question_uri, RDFS_LABEL, _literal("GraphRAG Question")),
@ -381,6 +384,13 @@ def question_triples(
        _triple(question_uri, TG_QUERY, _literal(query)),
    ]

+    if parent_uri:
+        triples.append(
+            _triple(question_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri))
+        )
+
+    return triples
+

 def grounding_triples(
    grounding_uri: str,
@ -407,7 +417,7 @@ def grounding_triples(
        _triple(grounding_uri, RDF_TYPE, _iri(PROV_ENTITY)),
        _triple(grounding_uri, RDF_TYPE, _iri(TG_GROUNDING)),
        _triple(grounding_uri, RDFS_LABEL, _literal("Grounding")),
-        _triple(grounding_uri, PROV_WAS_GENERATED_BY, _iri(question_uri)),
+        _triple(grounding_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri)),
    ]

    for concept in concepts:
@ -575,18 +585,21 @@ def docrag_question_triples(
    question_uri: str,
    query: str,
    timestamp: Optional[str] = None,
+    parent_uri: Optional[str] = None,
 ) -> List[Triple]:
    """
-    Build triples for a document RAG question activity.
+    Build triples for a document RAG question entity.

    Creates:
-    - Activity declaration with tg:Question type
+    - Entity declaration with tg:Question type
    - Query text and timestamp
+    - Optional wasDerivedFrom link to parent (for sub-traces)

    Args:
        question_uri: URI of the question (from docrag_question_uri)
        query: The user's query text
        timestamp: ISO timestamp (defaults to now)
+        parent_uri: Optional parent URI to link as wasDerivedFrom (for sub-traces)

    Returns:
        List of Triple objects
@ -594,8 +607,8 @@ def docrag_question_triples(
    if timestamp is None:
        timestamp = datetime.utcnow().isoformat() + "Z"

-    return [
-        _triple(question_uri, RDF_TYPE, _iri(PROV_ACTIVITY)),
+    triples = [
+        _triple(question_uri, RDF_TYPE, _iri(PROV_ENTITY)),
        _triple(question_uri, RDF_TYPE, _iri(TG_QUESTION)),
        _triple(question_uri, RDF_TYPE, _iri(TG_DOC_RAG_QUESTION)),
        _triple(question_uri, RDFS_LABEL, _literal("DocumentRAG Question")),
@ -603,6 +616,13 @@ def docrag_question_triples(
        _triple(question_uri, TG_QUERY, _literal(query)),
    ]

+    if parent_uri:
+        triples.append(
+            _triple(question_uri, PROV_WAS_DERIVED_FROM, _iri(parent_uri))
+        )
+
+    return triples
+

 def docrag_exploration_triples(
    exploration_uri: str,
--- a/trustgraph-base/trustgraph/schema/services/retrieval.py
+++ b/trustgraph-base/trustgraph/schema/services/retrieval.py
@ -18,6 +18,7 @@ class GraphRagQuery:
    edge_score_limit: int = 0
    edge_limit: int = 0
    streaming: bool = False
+    parent_uri: str = ""

@dataclass
 class GraphRagResponse: