Merge branch 'release/v2.3'

2026-04-25 00:16:23 +02:00 · 2026-04-16 09:13:52 +01:00 · 2026-04-16 09:13:52 +01:00 · 1f30a3bcea
commit 1f30a3bcea
parent 6590619951 fdb52a6bfc
155 changed files with 6526 additions and 1885 deletions
--- a/trustgraph-base/pyproject.toml
+++ b/trustgraph-base/pyproject.toml
@ -15,6 +15,7 @@ dependencies = [
    "requests",
    "python-logging-loki",
    "pika",
+    "pyyaml",
 ]
 classifiers = [
    "Programming Language :: Python :: 3",
@ -24,6 +25,9 @@ classifiers = [
 [project.urls]
 Homepage = "https://github.com/trustgraph-ai/trustgraph"

+[project.scripts]
+processor-group = "trustgraph.base.processor_group:run"
+
 [tool.setuptools.packages.find]
 include = ["trustgraph*"]

@ -31,4 +35,4 @@ include = ["trustgraph*"]
 "trustgraph.i18n.packs" = ["*.json"]

 [tool.setuptools.dynamic]
-version = {attr = "trustgraph.base_version.__version__"}
+version = {attr = "trustgraph.base_version.__version__"}
--- a/trustgraph-base/trustgraph/api/init.py
+++ b/trustgraph-base/trustgraph/api/init.py
@ -107,6 +107,7 @@ from .types import (
    AgentObservation,
    AgentAnswer,
    RAGChunk,
+    TextCompletionResult,
    ProvenanceEvent,
 )

@ -185,6 +186,7 @@ __all__ = [
    "AgentObservation",
    "AgentAnswer",
    "RAGChunk",
+    "TextCompletionResult",
    "ProvenanceEvent",

    # Exceptions
--- a/trustgraph-base/trustgraph/api/async_flow.py
+++ b/trustgraph-base/trustgraph/api/async_flow.py
@ -14,6 +14,8 @@ import aiohttp
 import json
 from typing import Optional, Dict, Any, List

+from . types import TextCompletionResult
+
 from . exceptions import ProtocolException, ApplicationException


@ -434,12 +436,11 @@ class AsyncFlowInstance:

        return await self.request("agent", request_data)

-    async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> str:
+    async def text_completion(self, system: str, prompt: str, **kwargs: Any) -> TextCompletionResult:
        """
        Generate text completion (non-streaming).

        Generates a text response from an LLM given a system prompt and user prompt.
-        Returns the complete response text.

        Note: This method does not support streaming. For streaming text generation,
        use AsyncSocketFlowInstance.text_completion() instead.
@ -450,19 +451,19 @@ class AsyncFlowInstance:
            **kwargs: Additional service-specific parameters

        Returns:
-            str: Complete generated text response
+            TextCompletionResult: Result with text, in_token, out_token, model

        Example:
            ```python
            async_flow = await api.async_flow()
            flow = async_flow.id("default")

-            # Generate text
-            response = await flow.text_completion(
+            result = await flow.text_completion(
                system="You are a helpful assistant.",
                prompt="Explain quantum computing in simple terms."
            )
-            print(response)
+            print(result.text)
+            print(f"Tokens: {result.in_token} in, {result.out_token} out")
            ```
        """
        request_data = {
@ -473,7 +474,12 @@ class AsyncFlowInstance:
        request_data.update(kwargs)

        result = await self.request("text-completion", request_data)
-        return result.get("response", "")
+        return TextCompletionResult(
+            text=result.get("response", ""),
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
+        )

    async def graph_rag(self, query: str, user: str, collection: str,
                        max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
--- a/trustgraph-base/trustgraph/api/async_socket_client.py
+++ b/trustgraph-base/trustgraph/api/async_socket_client.py
@ -4,7 +4,7 @@ import asyncio
 import websockets
 from typing import Optional, Dict, Any, AsyncIterator, Union

-from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk
+from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, TextCompletionResult
 from . exceptions import ProtocolException, ApplicationException


@ -178,30 +178,32 @@ class AsyncSocketClient:

    def _parse_chunk(self, resp: Dict[str, Any]):
        """Parse response chunk into appropriate type. Returns None for non-content messages."""
-        chunk_type = resp.get("chunk_type")
        message_type = resp.get("message_type")

        # Handle new GraphRAG message format with message_type
        if message_type == "provenance":
            return None

-        if chunk_type == "thought":
+        if message_type == "thought":
            return AgentThought(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False)
            )
-        elif chunk_type == "observation":
+        elif message_type == "observation":
            return AgentObservation(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False)
            )
-        elif chunk_type == "answer" or chunk_type == "final-answer":
+        elif message_type == "answer" or message_type == "final-answer":
            return AgentAnswer(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False),
-                end_of_dialog=resp.get("end_of_dialog", False)
+                end_of_dialog=resp.get("end_of_dialog", False),
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
            )
-        elif chunk_type == "action":
+        elif message_type == "action":
            return AgentThought(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False)
@ -211,7 +213,10 @@ class AsyncSocketClient:
            return RAGChunk(
                content=content,
                end_of_stream=resp.get("end_of_stream", False),
-                error=None
+                error=None,
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
            )

    async def aclose(self):
@ -269,7 +274,11 @@ class AsyncSocketFlowInstance:
            return await self.client._send_request("agent", self.flow_id, request)

    async def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs):
-        """Text completion with optional streaming"""
+        """Text completion with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an async iterator of RAGChunk (with token counts on the final chunk).
+        """
        request = {
            "system": system,
            "prompt": prompt,
@ -281,13 +290,18 @@ class AsyncSocketFlowInstance:
            return self._text_completion_streaming(request)
        else:
            result = await self.client._send_request("text-completion", self.flow_id, request)
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("response", ""),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )

    async def _text_completion_streaming(self, request):
-        """Helper for streaming text completion"""
+        """Helper for streaming text completion. Yields RAGChunk objects."""
        async for chunk in self.client._send_request_streaming("text-completion", self.flow_id, request):
-            if hasattr(chunk, 'content'):
-                yield chunk.content
+            if isinstance(chunk, RAGChunk):
+                yield chunk

    async def graph_rag(self, query: str, user: str, collection: str,
                        max_subgraph_size: int = 1000, max_subgraph_count: int = 5,
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -11,7 +11,7 @@ import base64

 from .. knowledge import hash, Uri, Literal, QuotedTriple
 from .. schema import IRI, LITERAL, TRIPLE
-from . types import Triple
+from . types import Triple, TextCompletionResult
 from . exceptions import ProtocolException


@ -360,16 +360,17 @@ class FlowInstance:
            prompt: User prompt/question

        Returns:
-            str: Generated response text
+            TextCompletionResult: Result with text, in_token, out_token, model

        Example:
            ```python
            flow = api.flow().id("default")
-            response = flow.text_completion(
+            result = flow.text_completion(
                system="You are a helpful assistant",
                prompt="What is quantum computing?"
            )
-            print(response)
+            print(result.text)
+            print(f"Tokens: {result.in_token} in, {result.out_token} out")
            ```
        """

@ -379,10 +380,17 @@ class FlowInstance:
            "prompt": prompt
        }

-        return self.request(
+        result = self.request(
            "service/text-completion",
            input
-        )["response"]
+        )
+
+        return TextCompletionResult(
+            text=result.get("response", ""),
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
+        )

    def agent(self, question, user="trustgraph", state=None, group=None, history=None):
        """
@ -498,10 +506,17 @@ class FlowInstance:
            "edge-limit": edge_limit,
        }

-        return self.request(
+        result = self.request(
            "service/graph-rag",
            input
-        )["response"]
+        )
+
+        return TextCompletionResult(
+            text=result.get("response", ""),
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
+        )

    def document_rag(
            self, query, user="trustgraph", collection="default",
@ -543,10 +558,17 @@ class FlowInstance:
            "doc-limit": doc_limit,
        }

-        return self.request(
+        result = self.request(
            "service/document-rag",
            input
-        )["response"]
+        )
+
+        return TextCompletionResult(
+            text=result.get("response", ""),
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
+        )

    def embeddings(self, texts):
        """
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@ -14,7 +14,7 @@ import websockets
 from typing import Optional, Dict, Any, Iterator, Union, List
 from threading import Lock

-from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent
+from . types import AgentThought, AgentObservation, AgentAnswer, RAGChunk, StreamingChunk, ProvenanceEvent, TextCompletionResult
 from . exceptions import ProtocolException, raise_from_error_dict


@ -360,41 +360,36 @@ class SocketClient:

    def _parse_chunk(self, resp: Dict[str, Any], include_provenance: bool = False) -> Optional[StreamingChunk]:
        """Parse response chunk into appropriate type. Returns None for non-content messages."""
-        chunk_type = resp.get("chunk_type")
        message_type = resp.get("message_type")

-        # Handle GraphRAG/DocRAG message format with message_type
        if message_type == "explain":
            if include_provenance:
                return self._build_provenance_event(resp)
            return None

-        # Handle Agent message format with chunk_type="explain"
-        if chunk_type == "explain":
-            if include_provenance:
-                return self._build_provenance_event(resp)
-            return None
-
-        if chunk_type == "thought":
+        if message_type == "thought":
            return AgentThought(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False),
                message_id=resp.get("message_id", ""),
            )
-        elif chunk_type == "observation":
+        elif message_type == "observation":
            return AgentObservation(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False),
                message_id=resp.get("message_id", ""),
            )
-        elif chunk_type == "answer" or chunk_type == "final-answer":
+        elif message_type == "answer" or message_type == "final-answer":
            return AgentAnswer(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False),
                end_of_dialog=resp.get("end_of_dialog", False),
                message_id=resp.get("message_id", ""),
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
            )
-        elif chunk_type == "action":
+        elif message_type == "action":
            return AgentThought(
                content=resp.get("content", ""),
                end_of_message=resp.get("end_of_message", False)
@ -404,7 +399,10 @@ class SocketClient:
            return RAGChunk(
                content=content,
                end_of_stream=resp.get("end_of_stream", False),
-                error=None
+                error=None,
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
            )

    def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
@ -543,8 +541,12 @@ class SocketFlowInstance:
            streaming=True, include_provenance=True
        )

-    def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[str, Iterator[str]]:
-        """Execute text completion with optional streaming."""
+    def text_completion(self, system: str, prompt: str, streaming: bool = False, **kwargs) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
+        """Execute text completion with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
+        """
        request = {
            "system": system,
            "prompt": prompt,
@ -557,12 +559,17 @@ class SocketFlowInstance:
        if streaming:
            return self._text_completion_generator(result)
        else:
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("response", ""),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )

-    def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
+    def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
        for chunk in result:
-            if hasattr(chunk, 'content'):
-                yield chunk.content
+            if isinstance(chunk, RAGChunk):
+                yield chunk

    def graph_rag(
        self,
@ -577,8 +584,12 @@ class SocketFlowInstance:
        edge_limit: int = 25,
        streaming: bool = False,
        **kwargs: Any
-    ) -> Union[str, Iterator[str]]:
-        """Execute graph-based RAG query with optional streaming."""
+    ) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
+        """Execute graph-based RAG query with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
+        """
        request = {
            "query": query,
            "user": user,
@ -598,7 +609,12 @@ class SocketFlowInstance:
        if streaming:
            return self._rag_generator(result)
        else:
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("response", ""),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )

    def graph_rag_explain(
        self,
@ -642,8 +658,12 @@ class SocketFlowInstance:
        doc_limit: int = 10,
        streaming: bool = False,
        **kwargs: Any
-    ) -> Union[str, Iterator[str]]:
-        """Execute document-based RAG query with optional streaming."""
+    ) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
+        """Execute document-based RAG query with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
+        """
        request = {
            "query": query,
            "user": user,
@ -658,7 +678,12 @@ class SocketFlowInstance:
        if streaming:
            return self._rag_generator(result)
        else:
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("response", ""),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )

    def document_rag_explain(
        self,
@ -684,10 +709,10 @@ class SocketFlowInstance:
            streaming=True, include_provenance=True
        )

-    def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
+    def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
        for chunk in result:
-            if hasattr(chunk, 'content'):
-                yield chunk.content
+            if isinstance(chunk, RAGChunk):
+                yield chunk

    def prompt(
        self,
@ -695,8 +720,12 @@ class SocketFlowInstance:
        variables: Dict[str, str],
        streaming: bool = False,
        **kwargs: Any
-    ) -> Union[str, Iterator[str]]:
-        """Execute a prompt template with optional streaming."""
+    ) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
+        """Execute a prompt template with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
+        """
        request = {
            "id": id,
            "variables": variables,
@ -709,7 +738,12 @@ class SocketFlowInstance:
        if streaming:
            return self._rag_generator(result)
        else:
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("text", result.get("response", "")),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )

    def graph_embeddings_query(
        self,
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@ -149,10 +149,10 @@ class AgentThought(StreamingChunk):
    Attributes:
        content: Agent's thought text
        end_of_message: True if this completes the current thought
-        chunk_type: Always "thought"
+        message_type: Always "thought"
        message_id: Provenance URI of the entity being built
    """
-    chunk_type: str = "thought"
+    message_type: str = "thought"
    message_id: str = ""

@dataclasses.dataclass
@ -166,10 +166,10 @@ class AgentObservation(StreamingChunk):
    Attributes:
        content: Observation text describing tool results
        end_of_message: True if this completes the current observation
-        chunk_type: Always "observation"
+        message_type: Always "observation"
        message_id: Provenance URI of the entity being built
    """
-    chunk_type: str = "observation"
+    message_type: str = "observation"
    message_id: str = ""

@dataclasses.dataclass
@ -184,11 +184,14 @@ class AgentAnswer(StreamingChunk):
        content: Answer text
        end_of_message: True if this completes the current answer segment
        end_of_dialog: True if this completes the entire agent interaction
-        chunk_type: Always "final-answer"
+        message_type: Always "final-answer"
    """
-    chunk_type: str = "final-answer"
+    message_type: str = "final-answer"
    end_of_dialog: bool = False
    message_id: str = ""
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None

@dataclasses.dataclass
 class RAGChunk(StreamingChunk):
@ -202,11 +205,37 @@ class RAGChunk(StreamingChunk):
        content: Generated text content
        end_of_stream: True if this is the final chunk of the stream
        error: Optional error information if an error occurred
-        chunk_type: Always "rag"
+        in_token: Input token count (populated on the final chunk, 0 otherwise)
+        out_token: Output token count (populated on the final chunk, 0 otherwise)
+        model: Model identifier (populated on the final chunk, empty otherwise)
+        message_type: Always "rag"
    """
-    chunk_type: str = "rag"
+    message_type: str = "rag"
    end_of_stream: bool = False
    error: Optional[Dict[str, str]] = None
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
+
+@dataclasses.dataclass
+class TextCompletionResult:
+    """
+    Result from a text completion request.
+
+    Returned by text_completion() in both streaming and non-streaming modes.
+    In streaming mode, text is None (chunks are delivered via the iterator).
+    In non-streaming mode, text contains the complete response.
+
+    Attributes:
+        text: Complete response text (None in streaming mode)
+        in_token: Input token count (None if not available)
+        out_token: Output token count (None if not available)
+        model: Model identifier (None if not available)
+    """
+    text: Optional[str]
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None

@dataclasses.dataclass
 class ProvenanceEvent:
--- a/trustgraph-base/trustgraph/base/init.py
+++ b/trustgraph-base/trustgraph/base/init.py
@ -18,8 +18,10 @@ from . librarian_client import LibrarianClient
 from . chunking_service import ChunkingService
 from . embeddings_service import EmbeddingsService
 from . embeddings_client import EmbeddingsClientSpec
-from . text_completion_client import TextCompletionClientSpec
-from . prompt_client import PromptClientSpec
+from . text_completion_client import (
+    TextCompletionClientSpec, TextCompletionClient, TextCompletionResult,
+)
+from . prompt_client import PromptClientSpec, PromptClient, PromptResult
 from . triples_store_service import TriplesStoreService
 from . graph_embeddings_store_service import GraphEmbeddingsStoreService
 from . document_embeddings_store_service import DocumentEmbeddingsStoreService
--- a/trustgraph-base/trustgraph/base/agent_client.py
+++ b/trustgraph-base/trustgraph/base/agent_client.py
@ -30,19 +30,19 @@ class AgentClient(RequestResponse):
                raise RuntimeError(resp.error.message)

            # Handle thought chunks
-            if resp.chunk_type == 'thought':
+            if resp.message_type == 'thought':
                if think:
                    await think(resp.content, resp.end_of_message)
                return False  # Continue receiving

            # Handle observation chunks
-            if resp.chunk_type == 'observation':
+            if resp.message_type == 'observation':
                if observe:
                    await observe(resp.content, resp.end_of_message)
                return False  # Continue receiving

            # Handle answer chunks
-            if resp.chunk_type == 'answer':
+            if resp.message_type == 'answer':
                if resp.content:
                    accumulated_answer.append(resp.content)
                if answer_callback:
--- a/trustgraph-base/trustgraph/base/backend.py
+++ b/trustgraph-base/trustgraph/base/backend.py
@ -58,6 +58,18 @@ class BackendProducer(Protocol):
 class BackendConsumer(Protocol):
    """Protocol for backend-specific consumer."""

+    def ensure_connected(self) -> None:
+        """
+        Eagerly establish the underlying connection and bind the queue.
+
+        Backends that lazily connect on first receive() must implement this
+        so that callers can guarantee the consumer is fully bound — and
+        therefore able to receive responses — before any related request is
+        published. Backends that connect at construction time may make this
+        a no-op.
+        """
+        ...
+
    def receive(self, timeout_millis: int = 2000) -> Message:
        """
        Receive a message from the topic.
--- a/trustgraph-base/trustgraph/base/chunking_service.py
+++ b/trustgraph-base/trustgraph/base/chunking_service.py
@ -88,14 +88,14 @@ class ChunkingService(FlowProcessor):
        chunk_overlap = default_chunk_overlap

        try:
-            cs = flow.parameters.get("chunk-size")
+            cs = flow("chunk-size")
            if cs is not None:
                chunk_size = int(cs)
        except Exception as e:
            logger.warning(f"Could not parse chunk-size parameter: {e}")

        try:
-            co = flow.parameters.get("chunk-overlap")
+            co = flow("chunk-overlap")
            if co is not None:
                chunk_overlap = int(co)
        except Exception as e:
--- a/trustgraph-base/trustgraph/base/logging.py
+++ b/trustgraph-base/trustgraph/base/logging.py
@ -8,12 +8,51 @@ ensuring consistent log formats, levels, and command-line arguments.
 Supports dual output to console and Loki for centralized log aggregation.
 """

+import contextvars
 import logging
 import logging.handlers
 from queue import Queue
 import os


+# The current processor id for this task context.  Read by
+# _ProcessorIdFilter to stamp every LogRecord with its owning
+# processor, and read by logging_loki's emitter via record.tags
+# to label log lines in Loki.  ContextVar so asyncio subtasks
+# inherit their parent supervisor's processor id automatically.
+current_processor_id = contextvars.ContextVar(
+    "current_processor_id", default="unknown"
+)
+
+
+def set_processor_id(pid):
+    """Set the processor id for the current task context.
+
+    All subsequent log records emitted from this task — and any
+    asyncio tasks spawned from it — will be tagged with this id
+    in the console format and in Loki labels.
+    """
+    current_processor_id.set(pid)
+
+
+class _ProcessorIdFilter(logging.Filter):
+    """Stamps every LogRecord with processor_id from the contextvar.
+
+    Attaches two fields to each record:
+        record.processor_id — used by the console format string
+        record.tags         — merged into Loki labels by logging_loki's
+                              emitter (it reads record.tags and combines
+                              with the handler's static tags)
+    """
+
+    def filter(self, record):
+        pid = current_processor_id.get()
+        record.processor_id = pid
+        existing = getattr(record, "tags", None) or {}
+        record.tags = {**existing, "processor": pid}
+        return True
+
+
 def add_logging_args(parser):
    """
    Add standard logging arguments to an argument parser.
@ -87,12 +126,15 @@ def setup_logging(args):
        loki_url = args.get('loki_url', 'http://loki:3100/loki/api/v1/push')
        loki_username = args.get('loki_username')
        loki_password = args.get('loki_password')
-        processor_id = args.get('id')  # Processor identity (e.g., "config-svc", "text-completion")

        try:
            from logging_loki import LokiHandler

-            # Create Loki handler with optional authentication and processor label
+            # Create Loki handler with optional authentication.  The
+            # processor label is NOT baked in here — it's stamped onto
+            # each record by _ProcessorIdFilter reading the task-local
+            # contextvar, and logging_loki's emitter reads record.tags
+            # to build per-record Loki labels.
            loki_handler_kwargs = {
                'url': loki_url,
                'version': "1",
@ -101,10 +143,6 @@ def setup_logging(args):
            if loki_username and loki_password:
                loki_handler_kwargs['auth'] = (loki_username, loki_password)

-            # Add processor label if available (for consistency with Prometheus metrics)
-            if processor_id:
-                loki_handler_kwargs['tags'] = {'processor': processor_id}
-
            loki_handler = LokiHandler(**loki_handler_kwargs)

            # Wrap in QueueHandler for non-blocking operation
@ -133,23 +171,44 @@ def setup_logging(args):
            print(f"WARNING: Failed to setup Loki logging: {e}")
            print("Continuing with console-only logging")

-    # Get processor ID for log formatting (use 'unknown' if not available)
-    processor_id = args.get('id', 'unknown')
-
-    # Configure logging with all handlers
-    # Use processor ID as the primary identifier in logs
+    # Configure logging with all handlers.  The processor id comes
+    # from _ProcessorIdFilter (via contextvar) and is injected into
+    # each record as record.processor_id.  The format string reads
+    # that attribute on every emit.
    logging.basicConfig(
        level=getattr(logging, log_level.upper()),
-        format=f'%(asctime)s - {processor_id} - %(levelname)s - %(message)s',
+        format='%(asctime)s - %(processor_id)s - %(levelname)s - %(message)s',
        handlers=handlers,
        force=True  # Force reconfiguration if already configured
    )

-    # Prevent recursive logging from Loki's HTTP client
-    if loki_enabled and queue_listener:
-        # Disable urllib3 logging to prevent infinite loop
-        logging.getLogger('urllib3').setLevel(logging.WARNING)
-        logging.getLogger('urllib3.connectionpool').setLevel(logging.WARNING)
+    # Attach the processor-id filter to every handler so all records
+    # passing through any sink get stamped (console, queue→loki,
+    # future handlers).  Filters on handlers run regardless of which
+    # logger originated the record, so logs from pika, cassandra,
+    # processor code, etc. all pass through it.
+    processor_filter = _ProcessorIdFilter()
+    for h in handlers:
+        h.addFilter(processor_filter)
+
+    # Seed the contextvar from --id if one was supplied.  In group
+    # mode --id isn't present; the processor_group supervisor sets
+    # it per task.  In standalone mode AsyncProcessor.launch provides
+    # it via argparse default.
+    if args.get('id'):
+        set_processor_id(args['id'])
+
+    # Silence noisy third-party library loggers.  These emit INFO-level
+    # chatter (connection churn, channel open/close, driver warnings) that
+    # drowns the useful signal and can't be attributed to a specific
+    # processor anyway.  WARNING and above still propagate.
+    for noisy in (
+        'pika',
+        'cassandra',
+        'urllib3',
+        'urllib3.connectionpool',
+    ):
+        logging.getLogger(noisy).setLevel(logging.WARNING)

    logger = logging.getLogger(__name__)
    logger.info(f"Logging configured with level: {log_level}")
--- a/trustgraph-base/trustgraph/base/processor_group.py
+++ b/trustgraph-base/trustgraph/base/processor_group.py
@ -0,0 +1,204 @@
+
+# Multi-processor group runner.  Runs multiple AsyncProcessor descendants
+# as concurrent tasks inside a single process, sharing one event loop,
+# one Prometheus HTTP server, and one pub/sub backend pool.
+#
+# Intended for dev and resource-constrained deployments.  Scale deployments
+# should continue to use per-processor endpoints.
+#
+# Group config is a YAML or JSON file with shape:
+#
+#   processors:
+#     - class: trustgraph.extract.kg.definitions.extract.Processor
+#       params:
+#         id: kg-extract-definitions
+#         triples_batch_size: 1000
+#     - class: trustgraph.chunking.recursive.Processor
+#       params:
+#         id: chunker-recursive
+#
+# Each entry's params are passed directly to the class constructor alongside
+# the shared taskgroup.  Defaults live inside each processor class.
+
+import argparse
+import asyncio
+import importlib
+import json
+import logging
+import time
+
+from prometheus_client import start_http_server
+
+from . logging import add_logging_args, setup_logging, set_processor_id
+
+logger = logging.getLogger(__name__)
+
+
+def _load_config(path):
+    with open(path) as f:
+        text = f.read()
+    if path.endswith((".yaml", ".yml")):
+        import yaml
+        return yaml.safe_load(text)
+    return json.loads(text)
+
+
+def _resolve_class(dotted):
+    module_path, _, class_name = dotted.rpartition(".")
+    if not module_path:
+        raise ValueError(
+            f"Processor class must be a dotted path, got {dotted!r}"
+        )
+    module = importlib.import_module(module_path)
+    return getattr(module, class_name)
+
+
+RESTART_DELAY_SECONDS = 4
+
+
+async def _supervise(entry):
+    """Run one processor with its own nested TaskGroup, restarting on any
+    failure.  Each processor is isolated from its siblings — a crash here
+    does not propagate to the outer group."""
+
+    pid = entry["params"]["id"]
+    class_path = entry["class"]
+
+    # Stamp the contextvar for this supervisor task.  Every log
+    # record emitted from this task — and from any inner TaskGroup
+    # child created by the processor — inherits this id via
+    # contextvar propagation.  Siblings in the outer group set
+    # their own id in their own task context and do not interfere.
+    set_processor_id(pid)
+
+    while True:
+
+        try:
+
+            async with asyncio.TaskGroup() as inner_tg:
+
+                cls = _resolve_class(class_path)
+                params = dict(entry.get("params", {}))
+                params["taskgroup"] = inner_tg
+
+                logger.info(f"Starting {class_path} as {pid}")
+
+                p = cls(**params)
+                await p.start()
+                inner_tg.create_task(p.run())
+
+            # Clean exit — processor's run() returned without raising.
+            # Treat as a transient shutdown and restart, matching the
+            # behaviour of per-container `restart: on-failure`.
+            logger.warning(
+                f"Processor {pid} exited cleanly, will restart"
+            )
+
+        except asyncio.CancelledError:
+            logger.info(f"Processor {pid} cancelled")
+            raise
+
+        except BaseExceptionGroup as eg:
+            for e in eg.exceptions:
+                logger.error(
+                    f"Processor {pid} failure: {type(e).__name__}: {e}",
+                    exc_info=e,
+                )
+
+        except Exception as e:
+            logger.error(
+                f"Processor {pid} failure: {type(e).__name__}: {e}",
+                exc_info=True,
+            )
+
+        logger.info(
+            f"Restarting {pid} in {RESTART_DELAY_SECONDS}s..."
+        )
+        await asyncio.sleep(RESTART_DELAY_SECONDS)
+
+
+async def run_group(config):
+
+    entries = config.get("processors", [])
+    if not entries:
+        raise RuntimeError("Group config has no processors")
+
+    seen_ids = set()
+    for entry in entries:
+        pid = entry.get("params", {}).get("id")
+        if pid is None:
+            raise RuntimeError(
+                f"Entry {entry.get('class')!r} missing params.id — "
+                f"required for metrics labelling"
+            )
+        if pid in seen_ids:
+            raise RuntimeError(f"Duplicate processor id {pid!r} in group")
+        seen_ids.add(pid)
+
+    async with asyncio.TaskGroup() as outer_tg:
+        for entry in entries:
+            outer_tg.create_task(_supervise(entry))
+
+
+def run():
+
+    parser = argparse.ArgumentParser(
+        prog="processor-group",
+        description="Run multiple processors as tasks in one process",
+    )
+
+    parser.add_argument(
+        "-c", "--config",
+        required=True,
+        help="Path to group config file (JSON or YAML)",
+    )
+
+    parser.add_argument(
+        "--metrics",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help="Metrics enabled (default: true)",
+    )
+
+    parser.add_argument(
+        "-P", "--metrics-port",
+        type=int,
+        default=8000,
+        help="Prometheus metrics port (default: 8000)",
+    )
+
+    add_logging_args(parser)
+
+    args = vars(parser.parse_args())
+
+    setup_logging(args)
+
+    config = _load_config(args["config"])
+
+    if args["metrics"]:
+        start_http_server(args["metrics_port"])
+
+    while True:
+
+        logger.info("Starting group...")
+
+        try:
+            asyncio.run(run_group(config))
+
+        except KeyboardInterrupt:
+            logger.info("Keyboard interrupt.")
+            return
+
+        except ExceptionGroup as e:
+            logger.error("Exception group:")
+            for se in e.exceptions:
+                logger.error(f"  Type: {type(se)}")
+                logger.error(f"  Exception: {se}", exc_info=se)
+
+        except Exception as e:
+            logger.error(f"Type: {type(e)}")
+            logger.error(f"Exception: {e}", exc_info=True)
+
+        logger.warning("Will retry...")
+        time.sleep(4)
+        logger.info("Retrying...")
--- a/trustgraph-base/trustgraph/base/prompt_client.py
+++ b/trustgraph-base/trustgraph/base/prompt_client.py
@ -1,10 +1,22 @@

 import json
 import asyncio
+from dataclasses import dataclass
+from typing import Optional, Any

 from . request_response_spec import RequestResponse, RequestResponseSpec
 from .. schema import PromptRequest, PromptResponse

+@dataclass
+class PromptResult:
+    response_type: str              # "text", "json", or "jsonl"
+    text: Optional[str] = None      # populated for "text"
+    object: Any = None              # populated for "json"
+    objects: Optional[list] = None  # populated for "jsonl"
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
+
 class PromptClient(RequestResponse):

    async def prompt(self, id, variables, timeout=600, streaming=False, chunk_callback=None):
@ -26,17 +38,40 @@ class PromptClient(RequestResponse):
            if resp.error:
                raise RuntimeError(resp.error.message)

-            if resp.text: return resp.text
+            if resp.text:
+                return PromptResult(
+                    response_type="text",
+                    text=resp.text,
+                    in_token=resp.in_token,
+                    out_token=resp.out_token,
+                    model=resp.model,
+                )

-            return json.loads(resp.object)
+            parsed = json.loads(resp.object)
+
+            if isinstance(parsed, list):
+                return PromptResult(
+                    response_type="jsonl",
+                    objects=parsed,
+                    in_token=resp.in_token,
+                    out_token=resp.out_token,
+                    model=resp.model,
+                )
+
+            return PromptResult(
+                response_type="json",
+                object=parsed,
+                in_token=resp.in_token,
+                out_token=resp.out_token,
+                model=resp.model,
+            )

        else:

-            last_text = ""
-            last_object = None
+            last_resp = None

            async def forward_chunks(resp):
-                nonlocal last_text, last_object
+                nonlocal last_resp

                if resp.error:
                    raise RuntimeError(resp.error.message)
@ -44,14 +79,13 @@ class PromptClient(RequestResponse):
                end_stream = getattr(resp, 'end_of_stream', False)

                if resp.text is not None:
-                    last_text = resp.text
                    if chunk_callback:
                        if asyncio.iscoroutinefunction(chunk_callback):
                            await chunk_callback(resp.text, end_stream)
                        else:
                            chunk_callback(resp.text, end_stream)
-                elif resp.object:
-                    last_object = resp.object
+
+                last_resp = resp

                return end_stream

@ -70,10 +104,36 @@ class PromptClient(RequestResponse):
                timeout=timeout
            )

-            if last_text:
-                return last_text
+            if last_resp is None:
+                return PromptResult(response_type="text")

-            return json.loads(last_object) if last_object else None
+            if last_resp.object:
+                parsed = json.loads(last_resp.object)
+
+                if isinstance(parsed, list):
+                    return PromptResult(
+                        response_type="jsonl",
+                        objects=parsed,
+                        in_token=last_resp.in_token,
+                        out_token=last_resp.out_token,
+                        model=last_resp.model,
+                    )
+
+                return PromptResult(
+                    response_type="json",
+                    object=parsed,
+                    in_token=last_resp.in_token,
+                    out_token=last_resp.out_token,
+                    model=last_resp.model,
+                )
+
+            return PromptResult(
+                response_type="text",
+                text=last_resp.text,
+                in_token=last_resp.in_token,
+                out_token=last_resp.out_token,
+                model=last_resp.model,
+            )

    async def extract_definitions(self, text, timeout=600):
        return await self.prompt(
@ -152,4 +212,3 @@ class PromptClientSpec(RequestResponseSpec):
            response_schema = PromptResponse,
            impl = PromptClient,
        )
-
--- a/trustgraph-base/trustgraph/base/pulsar_backend.py
+++ b/trustgraph-base/trustgraph/base/pulsar_backend.py
@ -72,6 +72,16 @@ class PulsarBackendConsumer:
        self._consumer = pulsar_consumer
        self._schema_cls = schema_cls

+    def ensure_connected(self) -> None:
+        """No-op for Pulsar.
+
+        PulsarBackend.create_consumer() calls client.subscribe() which is
+        synchronous and returns a fully-subscribed consumer, so the
+        consumer is already ready by the time this object is constructed.
+        Defined for parity with the BackendConsumer protocol used by
+        Subscriber.start()'s readiness barrier."""
+        pass
+
    def receive(self, timeout_millis: int = 2000) -> Message:
        """Receive a message. Raises TimeoutError if no message available."""
        try:
--- a/trustgraph-base/trustgraph/base/rabbitmq_backend.py
+++ b/trustgraph-base/trustgraph/base/rabbitmq_backend.py
@ -214,16 +214,43 @@ class RabbitMQBackendConsumer:
            and self._channel.is_open
        )

+    def ensure_connected(self) -> None:
+        """Eagerly declare and bind the queue.
+
+        Without this, the queue is only declared lazily on the first
+        receive() call. For request/response with ephemeral per-subscriber
+        response queues that is a race: a request published before the
+        response queue is bound will have its reply silently dropped by
+        the broker. Subscriber.start() calls this so callers get a hard
+        readiness barrier."""
+        if not self._is_alive():
+            self._connect()
+
    def receive(self, timeout_millis: int = 2000) -> Message:
-        """Receive a message. Raises TimeoutError if none available."""
+        """Receive a message. Raises TimeoutError if none available.
+
+        Loop ordering matters: check _incoming at the TOP of each
+        iteration, not as the loop condition.  process_data_events
+        may dispatch a message via the _on_message callback during
+        the pump; we must re-check _incoming on the next iteration
+        before giving up on the deadline.  The previous control
+        flow (`while deadline: check; pump`) could lose a wakeup if
+        the pump consumed the remainder of the window — the
+        `while` check would fail before `_incoming` was re-read,
+        leaving a just-dispatched message stranded until the next
+        receive() call one full poll cycle later.
+        """
        if not self._is_alive():
            self._connect()

        timeout_seconds = timeout_millis / 1000.0
        deadline = time.monotonic() + timeout_seconds

-        while time.monotonic() < deadline:
-            # Check if a message was already delivered
+        while True:
+            # Check if a message has been dispatched to our queue.
+            # This catches both (a) messages dispatched before this
+            # receive() was called and (b) messages dispatched
+            # during the previous iteration's process_data_events.
            try:
                method, properties, body = self._incoming.get_nowait()
                return RabbitMQMessage(
@ -232,14 +259,16 @@ class RabbitMQBackendConsumer:
            except queue.Empty:
                pass

-            # Drive pika's I/O — delivers messages and processes heartbeats
            remaining = deadline - time.monotonic()
-            if remaining > 0:
-                self._connection.process_data_events(
-                    time_limit=min(0.1, remaining),
-                )
+            if remaining <= 0:
+                raise TimeoutError("No message received within timeout")

-        raise TimeoutError("No message received within timeout")
+            # Drive pika's I/O.  Any messages delivered during this
+            # call land in _incoming via _on_message; the next
+            # iteration of this loop catches them at the top.
+            self._connection.process_data_events(
+                time_limit=min(0.1, remaining),
+            )

    def acknowledge(self, message: Message) -> None:
        if isinstance(message, RabbitMQMessage) and message._method:
--- a/trustgraph-base/trustgraph/base/subscriber.py
+++ b/trustgraph-base/trustgraph/base/subscriber.py
@ -41,14 +41,55 @@ class Subscriber:
        self.consumer = None
        self.executor = None

+        # Readiness barrier — completed by run() once the underlying
+        # backend consumer is fully connected and bound. start() awaits
+        # this so callers know any subsequently published request will
+        # have a queue ready to receive its response. Without this,
+        # ephemeral per-subscriber response queues (RabbitMQ auto-delete
+        # exclusive queues) would race the request and lose the reply.
+        # A Future is used (rather than an Event) so that a first-attempt
+        # connection failure can be propagated to start() as an exception.
+        self._ready = None  # created in start() so we have a running loop
+
    def __del__(self):

        self.running = False

    async def start(self):

+        self._ready = asyncio.get_event_loop().create_future()
        self.task = asyncio.create_task(self.run())

+        # Block until run() signals readiness OR exits. The future
+        # carries the outcome of the first connect attempt: a value on
+        # success, an exception on first-attempt failure. If run() exits
+        # without ever signalling (e.g. cancelled, or a code path bug),
+        # we surface that as a clear RuntimeError rather than hanging
+        # forever waiting on the future.
+        ready_wait = asyncio.ensure_future(
+            asyncio.shield(self._ready)
+        )
+        try:
+            await asyncio.wait(
+                {self.task, ready_wait},
+                return_when=asyncio.FIRST_COMPLETED,
+            )
+        finally:
+            ready_wait.cancel()
+
+        if self._ready.done():
+            # Re-raise first-attempt connect failure if any.
+            self._ready.result()
+            return
+
+        # run() exited before _ready was settled. Propagate its exception
+        # if it had one, otherwise raise a generic readiness error.
+        if self.task.done() and self.task.exception() is not None:
+            raise self.task.exception()
+        raise RuntimeError(
+            "Subscriber.run() exited before signalling readiness"
+        )
+
    async def stop(self):
        """Initiate graceful shutdown with draining"""
        self.running = False
@ -66,6 +107,7 @@ class Subscriber:

    async def run(self):
        """Enhanced run method with integrated draining logic"""
+        first_attempt = True
        while self.running or self.draining:

            if self.metrics:
@ -87,10 +129,27 @@ class Subscriber:
                        ),
                    )

+                    # Eagerly bind the queue. For backends that connect
+                    # lazily on first receive (RabbitMQ), this is what
+                    # closes the request/response setup race — without
+                    # it the response queue is not bound until later and
+                    # any reply published in the meantime is dropped.
+                    await loop.run_in_executor(
+                        self.executor,
+                        lambda: self.consumer.ensure_connected(),
+                    )
+
                if self.metrics:
                    self.metrics.state("running")

                logger.info("Subscriber running...")
+
+                # Signal start() that the consumer is ready. This must
+                # happen AFTER ensure_connected() above so callers can
+                # safely publish requests immediately after start() returns.
+                if first_attempt and not self._ready.done():
+                    self._ready.set_result(None)
+                    first_attempt = False
                drain_end_time = None

                while self.running or self.draining:
@ -162,6 +221,16 @@ class Subscriber:
            except Exception as e:
                logger.error(f"Subscriber exception: {e}", exc_info=True)

+                # First-attempt connection failure: propagate to start()
+                # so the caller can decide what to do (retry, give up).
+                # Subsequent failures use the existing retry-with-backoff
+                # path so a long-lived subscriber survives broker blips.
+                if first_attempt and not self._ready.done():
+                    self._ready.set_exception(e)
+                    first_attempt = False
+                    # Falls through into finally for cleanup, then the
+                    # outer return below ends run() so start() unblocks.
+
            finally:
                # Negative acknowledge any pending messages
                for msg in self.pending_acks.values():
@ -193,6 +262,11 @@ class Subscriber:
            if not self.running and not self.draining:
                return

+            # If start() has already returned with an exception there is
+            # nothing more to do — exit run() rather than busy-retry.
+            if self._ready.done() and self._ready.exception() is not None:
+                return
+
            # Sleep before retry
            await asyncio.sleep(1)

--- a/trustgraph-base/trustgraph/base/text_completion_client.py
+++ b/trustgraph-base/trustgraph/base/text_completion_client.py
@ -1,47 +1,71 @@

+from dataclasses import dataclass
+from typing import Optional
+
 from . request_response_spec import RequestResponse, RequestResponseSpec
 from .. schema import TextCompletionRequest, TextCompletionResponse

+@dataclass
+class TextCompletionResult:
+    text: Optional[str]
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
+
 class TextCompletionClient(RequestResponse):
-    async def text_completion(self, system, prompt, streaming=False, timeout=600):
-        # If not streaming, use original behavior
-        if not streaming:
-            resp = await self.request(
-                TextCompletionRequest(
-                    system = system, prompt = prompt, streaming = False
-                ),
-                timeout=timeout
-            )

-            if resp.error:
-                raise RuntimeError(resp.error.message)
+    async def text_completion(self, system, prompt, timeout=600):

-            return resp.response
-
-        # For streaming: collect all chunks and return complete response
-        full_response = ""
-
-        async def collect_chunks(resp):
-            nonlocal full_response
-
-            if resp.error:
-                raise RuntimeError(resp.error.message)
-
-            if resp.response:
-                full_response += resp.response
-
-            # Return True when end_of_stream is reached
-            return getattr(resp, 'end_of_stream', False)
-
-        await self.request(
+        resp = await self.request(
            TextCompletionRequest(
-                system = system, prompt = prompt, streaming = True
+                system = system, prompt = prompt, streaming = False
            ),
-            recipient=collect_chunks,
            timeout=timeout
        )

-        return full_response
+        if resp.error:
+            raise RuntimeError(resp.error.message)
+
+        return TextCompletionResult(
+            text = resp.response,
+            in_token = resp.in_token,
+            out_token = resp.out_token,
+            model = resp.model,
+        )
+
+    async def text_completion_stream(
+            self, system, prompt, handler, timeout=600,
+    ):
+        """
+        Streaming text completion. `handler` is an async callable invoked
+        once per chunk with the chunk's TextCompletionResponse. Returns a
+        TextCompletionResult with text=None and token counts / model taken
+        from the end_of_stream message.
+        """
+
+        async def on_chunk(resp):
+
+            if resp.error:
+                raise RuntimeError(resp.error.message)
+
+            await handler(resp)
+
+            return getattr(resp, "end_of_stream", False)
+
+        final = await self.request(
+            TextCompletionRequest(
+                system = system, prompt = prompt, streaming = True
+            ),
+            recipient=on_chunk,
+            timeout=timeout,
+        )
+
+        return TextCompletionResult(
+            text = None,
+            in_token = final.in_token,
+            out_token = final.out_token,
+            model = final.model,
+        )

 class TextCompletionClientSpec(RequestResponseSpec):
    def __init__(
@ -54,4 +78,3 @@ class TextCompletionClientSpec(RequestResponseSpec):
            response_schema = TextCompletionResponse,
            impl = TextCompletionClient,
        )
-
--- a/trustgraph-base/trustgraph/clients/agent_client.py
+++ b/trustgraph-base/trustgraph/clients/agent_client.py
@ -58,23 +58,23 @@ class AgentClient(BaseClient):

        def inspect(x):
            # Handle errors
-            if x.chunk_type == 'error' or x.error:
+            if x.message_type == 'error' or x.error:
                if error_callback:
                    error_callback(x.content or (x.error.message if x.error else ""))
                # Continue to check end_of_dialog

            # Handle thought chunks
-            elif x.chunk_type == 'thought':
+            elif x.message_type == 'thought':
                if think:
                    think(x.content, x.end_of_message)

            # Handle observation chunks
-            elif x.chunk_type == 'observation':
+            elif x.message_type == 'observation':
                if observe:
                    observe(x.content, x.end_of_message)

            # Handle answer chunks
-            elif x.chunk_type == 'answer':
+            elif x.message_type == 'answer':
                if x.content:
                    accumulated_answer.append(x.content)
                if answer_callback:
--- a/trustgraph-base/trustgraph/messaging/translators/agent.py
+++ b/trustgraph-base/trustgraph/messaging/translators/agent.py
@ -60,8 +60,8 @@ class AgentResponseTranslator(MessageTranslator):
    def encode(self, obj: AgentResponse) -> Dict[str, Any]:
        result = {}

-        if obj.chunk_type:
-            result["chunk_type"] = obj.chunk_type
+        if obj.message_type:
+            result["message_type"] = obj.message_type
        if obj.content:
            result["content"] = obj.content
        result["end_of_message"] = getattr(obj, "end_of_message", False)
@ -90,6 +90,13 @@ class AgentResponseTranslator(MessageTranslator):
        if hasattr(obj, 'error') and obj.error and obj.error.message:
            result["error"] = {"message": obj.error.message, "code": obj.error.code}

+        if obj.in_token is not None:
+            result["in_token"] = obj.in_token
+        if obj.out_token is not None:
+            result["out_token"] = obj.out_token
+        if obj.model is not None:
+            result["model"] = obj.model
+
        return result

    def encode_with_completion(self, obj: AgentResponse) -> Tuple[Dict[str, Any], bool]:
--- a/trustgraph-base/trustgraph/messaging/translators/document_loading.py
+++ b/trustgraph-base/trustgraph/messaging/translators/document_loading.py
@ -151,7 +151,7 @@ class DocumentEmbeddingsTranslator(SendTranslator):
        chunks = [
            ChunkEmbeddings(
                chunk_id=chunk["chunk_id"],
-                vectors=chunk["vectors"]
+                vector=chunk["vector"]
            )
            for chunk in data.get("chunks", [])
        ]
--- a/trustgraph-base/trustgraph/messaging/translators/knowledge.py
+++ b/trustgraph-base/trustgraph/messaging/translators/knowledge.py
@ -39,7 +39,7 @@ class KnowledgeRequestTranslator(MessageTranslator):
                entities=[
                    EntityEmbeddings(
                        entity=self.value_translator.decode(ent["entity"]),
-                        vectors=ent["vectors"],
+                        vector=ent["vector"],
                    )
                    for ent in data["graph-embeddings"]["entities"]
                ]
--- a/trustgraph-base/trustgraph/messaging/translators/prompt.py
+++ b/trustgraph-base/trustgraph/messaging/translators/prompt.py
@ -53,6 +53,13 @@ class PromptResponseTranslator(MessageTranslator):
        # Always include end_of_stream flag for streaming support
        result["end_of_stream"] = getattr(obj, "end_of_stream", False)

+        if obj.in_token is not None:
+            result["in_token"] = obj.in_token
+        if obj.out_token is not None:
+            result["out_token"] = obj.out_token
+        if obj.model is not None:
+            result["model"] = obj.model
+
        return result
    
    def encode_with_completion(self, obj: PromptResponse) -> Tuple[Dict[str, Any], bool]:
--- a/trustgraph-base/trustgraph/messaging/translators/retrieval.py
+++ b/trustgraph-base/trustgraph/messaging/translators/retrieval.py
@ -74,6 +74,13 @@ class DocumentRagResponseTranslator(MessageTranslator):
        if hasattr(obj, 'error') and obj.error and obj.error.message:
            result["error"] = {"message": obj.error.message, "type": obj.error.type}

+        if obj.in_token is not None:
+            result["in_token"] = obj.in_token
+        if obj.out_token is not None:
+            result["out_token"] = obj.out_token
+        if obj.model is not None:
+            result["model"] = obj.model
+
        return result

    def encode_with_completion(self, obj: DocumentRagResponse) -> Tuple[Dict[str, Any], bool]:
@ -163,6 +170,13 @@ class GraphRagResponseTranslator(MessageTranslator):
        if hasattr(obj, 'error') and obj.error and obj.error.message:
            result["error"] = {"message": obj.error.message, "type": obj.error.type}

+        if obj.in_token is not None:
+            result["in_token"] = obj.in_token
+        if obj.out_token is not None:
+            result["out_token"] = obj.out_token
+        if obj.model is not None:
+            result["model"] = obj.model
+
        return result

    def encode_with_completion(self, obj: GraphRagResponse) -> Tuple[Dict[str, Any], bool]:
--- a/trustgraph-base/trustgraph/messaging/translators/text_completion.py
+++ b/trustgraph-base/trustgraph/messaging/translators/text_completion.py
@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
    def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
        result = {"response": obj.response}

-        if obj.in_token:
+        if obj.in_token is not None:
            result["in_token"] = obj.in_token
-        if obj.out_token:
+        if obj.out_token is not None:
            result["out_token"] = obj.out_token
-        if obj.model:
+        if obj.model is not None:
            result["model"] = obj.model

        # Always include end_of_stream flag for streaming support
--- a/trustgraph-base/trustgraph/provenance/init.py
+++ b/trustgraph-base/trustgraph/provenance/init.py
@ -59,6 +59,7 @@ from . uris import (
    agent_plan_uri,
    agent_step_result_uri,
    agent_synthesis_uri,
+    agent_pattern_decision_uri,
    # Document RAG provenance URIs
    docrag_question_uri,
    docrag_grounding_uri,
@ -102,6 +103,11 @@ from . namespaces import (
    # Agent provenance predicates
    TG_THOUGHT, TG_ACTION, TG_ARGUMENTS, TG_OBSERVATION,
    TG_SUBAGENT_GOAL, TG_PLAN_STEP,
+    TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
+    TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
+    TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
+    TG_IN_TOKEN, TG_OUT_TOKEN,
+    TG_ERROR_TYPE,
    # Orchestrator entity types
    TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
    # Document reference predicate
@ -141,6 +147,7 @@ from . agent import (
    agent_plan_triples,
    agent_step_result_triples,
    agent_synthesis_triples,
+    agent_pattern_decision_triples,
 )

 # Vocabulary bootstrap
@ -182,6 +189,7 @@ __all__ = [
    "agent_plan_uri",
    "agent_step_result_uri",
    "agent_synthesis_uri",
+    "agent_pattern_decision_uri",
    # Document RAG provenance URIs
    "docrag_question_uri",
    "docrag_grounding_uri",
@ -218,6 +226,11 @@ __all__ = [
    # Agent provenance predicates
    "TG_THOUGHT", "TG_ACTION", "TG_ARGUMENTS", "TG_OBSERVATION",
    "TG_SUBAGENT_GOAL", "TG_PLAN_STEP",
+    "TG_TOOL_CANDIDATE", "TG_TERMINATION_REASON",
+    "TG_STEP_NUMBER", "TG_PATTERN_DECISION", "TG_PATTERN", "TG_TASK_TYPE",
+    "TG_LLM_DURATION_MS", "TG_TOOL_DURATION_MS", "TG_TOOL_ERROR",
+    "TG_IN_TOKEN", "TG_OUT_TOKEN",
+    "TG_ERROR_TYPE",
    # Orchestrator entity types
    "TG_DECOMPOSITION", "TG_FINDING", "TG_PLAN_TYPE", "TG_STEP_RESULT",
    # Document reference predicate
@ -249,6 +262,7 @@ __all__ = [
    "agent_plan_triples",
    "agent_step_result_triples",
    "agent_synthesis_triples",
+    "agent_pattern_decision_triples",
    # Utility
    "set_graph",
    # Vocabulary
--- a/trustgraph-base/trustgraph/provenance/agent.py
+++ b/trustgraph-base/trustgraph/provenance/agent.py
@ -29,6 +29,11 @@ from . namespaces import (
    TG_AGENT_QUESTION,
    TG_DECOMPOSITION, TG_FINDING, TG_PLAN_TYPE, TG_STEP_RESULT,
    TG_SYNTHESIS, TG_SUBAGENT_GOAL, TG_PLAN_STEP,
+    TG_TOOL_CANDIDATE, TG_TERMINATION_REASON,
+    TG_STEP_NUMBER, TG_PATTERN_DECISION, TG_PATTERN, TG_TASK_TYPE,
+    TG_LLM_DURATION_MS, TG_TOOL_DURATION_MS, TG_TOOL_ERROR,
+    TG_ERROR_TYPE,
+    TG_IN_TOKEN, TG_OUT_TOKEN, TG_LLM_MODEL,
 )


@ -47,6 +52,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
    return Triple(s=_iri(s), p=_iri(p), o=o_term)


+def _append_token_triples(triples, uri, in_token=None, out_token=None,
+                          model=None):
+    """Append in_token/out_token/model triples when values are present."""
+    if in_token is not None:
+        triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
+    if out_token is not None:
+        triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
+    if model is not None:
+        triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
+
+
 def agent_session_triples(
    session_uri: str,
    query: str,
@ -90,6 +106,43 @@ def agent_session_triples(
    return triples


+def agent_pattern_decision_triples(
+    uri: str,
+    session_uri: str,
+    pattern: str,
+    task_type: str = "",
+) -> List[Triple]:
+    """
+    Build triples for a meta-router pattern decision.
+
+    Creates:
+    - Entity declaration with tg:PatternDecision type
+    - wasDerivedFrom link to session
+    - Pattern and task type predicates
+
+    Args:
+        uri: URI of this decision (from agent_pattern_decision_uri)
+        session_uri: URI of the parent session
+        pattern: Selected execution pattern (e.g. "react", "plan-then-execute")
+        task_type: Identified task type (e.g. "general", "research")
+
+    Returns:
+        List of Triple objects
+    """
+    triples = [
+        _triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
+        _triple(uri, RDF_TYPE, _iri(TG_PATTERN_DECISION)),
+        _triple(uri, RDFS_LABEL, _literal(f"Pattern: {pattern}")),
+        _triple(uri, TG_PATTERN, _literal(pattern)),
+        _triple(uri, PROV_WAS_DERIVED_FROM, _iri(session_uri)),
+    ]
+
+    if task_type:
+        triples.append(_triple(uri, TG_TASK_TYPE, _literal(task_type)))
+
+    return triples
+
+
 def agent_iteration_triples(
    iteration_uri: str,
    question_uri: Optional[str] = None,
@ -98,6 +151,12 @@ def agent_iteration_triples(
    arguments: Dict[str, Any] = None,
    thought_uri: Optional[str] = None,
    thought_document_id: Optional[str] = None,
+    tool_candidates: Optional[List[str]] = None,
+    step_number: Optional[int] = None,
+    llm_duration_ms: Optional[int] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for one agent iteration (Analysis+ToolUse).
@ -106,6 +165,7 @@ def agent_iteration_triples(
    - Entity declaration with tg:Analysis and tg:ToolUse types
    - wasDerivedFrom link to question (if first iteration) or previous
    - Action and arguments metadata
+    - Tool candidates (names of tools visible to the LLM)
    - Thought sub-entity (tg:Reflection, tg:Thought) with librarian document

    Args:
@ -116,6 +176,7 @@ def agent_iteration_triples(
        arguments: Arguments passed to the tool (will be JSON-encoded)
        thought_uri: URI for the thought sub-entity
        thought_document_id: Document URI for thought in librarian
+        tool_candidates: List of tool names available to the LLM

    Returns:
        List of Triple objects
@ -132,6 +193,23 @@ def agent_iteration_triples(
        _triple(iteration_uri, TG_ARGUMENTS, _literal(json.dumps(arguments))),
    ]

+    if tool_candidates:
+        for name in tool_candidates:
+            triples.append(
+                _triple(iteration_uri, TG_TOOL_CANDIDATE, _literal(name))
+            )
+
+    if step_number is not None:
+        triples.append(
+            _triple(iteration_uri, TG_STEP_NUMBER, _literal(str(step_number)))
+        )
+
+    if llm_duration_ms is not None:
+        triples.append(
+            _triple(iteration_uri, TG_LLM_DURATION_MS,
+                    _literal(str(llm_duration_ms)))
+        )
+
    if question_uri:
        triples.append(
            _triple(iteration_uri, PROV_WAS_DERIVED_FROM, _iri(question_uri))
@ -155,6 +233,8 @@ def agent_iteration_triples(
                _triple(thought_uri, TG_DOCUMENT, _iri(thought_document_id))
            )

+    _append_token_triples(triples, iteration_uri, in_token, out_token, model)
+
    return triples


@ -162,6 +242,8 @@ def agent_observation_triples(
    observation_uri: str,
    iteration_uri: str,
    document_id: Optional[str] = None,
+    tool_duration_ms: Optional[int] = None,
+    tool_error: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for an agent observation (standalone entity).
@ -170,11 +252,15 @@ def agent_observation_triples(
    - Entity declaration with prov:Entity and tg:Observation types
    - wasDerivedFrom link to the iteration (Analysis+ToolUse)
    - Document reference to librarian (if provided)
+    - Tool execution duration (if provided)
+    - Tool error message (if the tool failed)

    Args:
        observation_uri: URI of the observation entity
        iteration_uri: URI of the iteration this observation derives from
        document_id: Librarian document ID for the observation content
+        tool_duration_ms: Tool execution time in milliseconds
+        tool_error: Error message if the tool failed

    Returns:
        List of Triple objects
@ -191,6 +277,20 @@ def agent_observation_triples(
            _triple(observation_uri, TG_DOCUMENT, _iri(document_id))
        )

+    if tool_duration_ms is not None:
+        triples.append(
+            _triple(observation_uri, TG_TOOL_DURATION_MS,
+                    _literal(str(tool_duration_ms)))
+        )
+
+    if tool_error:
+        triples.append(
+            _triple(observation_uri, TG_TOOL_ERROR, _literal(tool_error))
+        )
+        triples.append(
+            _triple(observation_uri, RDF_TYPE, _iri(TG_ERROR_TYPE))
+        )
+
    return triples


@ -199,6 +299,10 @@ def agent_final_triples(
    question_uri: Optional[str] = None,
    previous_uri: Optional[str] = None,
    document_id: Optional[str] = None,
+    termination_reason: Optional[str] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for an agent final answer (Conclusion).
@ -208,12 +312,15 @@ def agent_final_triples(
    - wasGeneratedBy link to question (if no iterations)
    - wasDerivedFrom link to last iteration (if iterations exist)
    - Document reference to librarian
+    - Termination reason (why the agent loop stopped)

    Args:
        final_uri: URI of the final answer (from agent_final_uri)
        question_uri: URI of the question activity (if no iterations)
        previous_uri: URI of the last iteration (if iterations exist)
        document_id: Librarian document ID for the answer content
+        termination_reason: Why the loop stopped, e.g. "final-answer",
+            "max-iterations", "error"

    Returns:
        List of Triple objects
@ -237,6 +344,14 @@ def agent_final_triples(
    if document_id:
        triples.append(_triple(final_uri, TG_DOCUMENT, _iri(document_id)))

+    if termination_reason:
+        triples.append(
+            _triple(final_uri, TG_TERMINATION_REASON,
+                    _literal(termination_reason))
+        )
+
+    _append_token_triples(triples, final_uri, in_token, out_token, model)
+
    return triples


@ -244,6 +359,9 @@ def agent_decomposition_triples(
    uri: str,
    session_uri: str,
    goals: List[str],
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """Build triples for a supervisor decomposition step."""
    triples = [
@ -255,6 +373,7 @@ def agent_decomposition_triples(
    ]
    for goal in goals:
        triples.append(_triple(uri, TG_SUBAGENT_GOAL, _literal(goal)))
+    _append_token_triples(triples, uri, in_token, out_token, model)
    return triples


@ -282,6 +401,9 @@ def agent_plan_triples(
    uri: str,
    session_uri: str,
    steps: List[str],
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """Build triples for a plan-then-execute plan."""
    triples = [
@ -293,6 +415,7 @@ def agent_plan_triples(
    ]
    for step in steps:
        triples.append(_triple(uri, TG_PLAN_STEP, _literal(step)))
+    _append_token_triples(triples, uri, in_token, out_token, model)
    return triples


@ -301,6 +424,9 @@ def agent_step_result_triples(
    plan_uri: str,
    goal: str,
    document_id: Optional[str] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """Build triples for a plan step result."""
    triples = [
@ -313,6 +439,7 @@ def agent_step_result_triples(
    ]
    if document_id:
        triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
+    _append_token_triples(triples, uri, in_token, out_token, model)
    return triples


@ -320,6 +447,10 @@ def agent_synthesis_triples(
    uri: str,
    previous_uris,
    document_id: Optional[str] = None,
+    termination_reason: Optional[str] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """Build triples for a synthesis answer.

@ -327,6 +458,8 @@ def agent_synthesis_triples(
        uri: URI of the synthesis entity
        previous_uris: Single URI string or list of URIs to derive from
        document_id: Librarian document ID for the answer content
+        termination_reason: Why the agent loop stopped
+        in_token/out_token/model: Token usage for the synthesis LLM call
    """
    triples = [
        _triple(uri, RDF_TYPE, _iri(PROV_ENTITY)),
@ -342,4 +475,12 @@ def agent_synthesis_triples(

    if document_id:
        triples.append(_triple(uri, TG_DOCUMENT, _iri(document_id)))
+
+    if termination_reason:
+        triples.append(
+            _triple(uri, TG_TERMINATION_REASON, _literal(termination_reason))
+        )
+
+    _append_token_triples(triples, uri, in_token, out_token, model)
+
    return triples
--- a/trustgraph-base/trustgraph/provenance/namespaces.py
+++ b/trustgraph-base/trustgraph/provenance/namespaces.py
@ -119,6 +119,18 @@ TG_ARGUMENTS = TG + "arguments"
 TG_OBSERVATION = TG + "observation" # Links iteration to observation sub-entity
 TG_SUBAGENT_GOAL = TG + "subagentGoal"  # Goal string on Decomposition/Finding
 TG_PLAN_STEP = TG + "planStep"          # Step goal string on Plan/StepResult
+TG_TOOL_CANDIDATE = TG + "toolCandidate"  # Tool name on Analysis events
+TG_TERMINATION_REASON = TG + "terminationReason"  # Why the agent loop stopped
+TG_STEP_NUMBER = TG + "stepNumber"  # Explicit step counter on iteration events
+TG_PATTERN_DECISION = TG + "PatternDecision"  # Meta-router routing decision entity type
+TG_PATTERN = TG + "pattern"  # Selected execution pattern
+TG_TASK_TYPE = TG + "taskType"  # Identified task type
+TG_LLM_DURATION_MS = TG + "llmDurationMs"  # LLM call duration in milliseconds
+TG_TOOL_DURATION_MS = TG + "toolDurationMs"  # Tool execution duration in milliseconds
+TG_TOOL_ERROR = TG + "toolError"  # Error message from a failed tool execution
+TG_ERROR_TYPE = TG + "Error"  # Mixin type for failure events
+TG_IN_TOKEN = TG + "inToken"  # Input token count for an LLM call
+TG_OUT_TOKEN = TG + "outToken"  # Output token count for an LLM call

 # Named graph URIs for RDF datasets
 # These separate different types of data while keeping them in the same collection
--- a/trustgraph-base/trustgraph/provenance/triples.py
+++ b/trustgraph-base/trustgraph/provenance/triples.py
@ -34,6 +34,8 @@ from . namespaces import (
    TG_ANSWER_TYPE,
    # Question subtypes
    TG_GRAPH_RAG_QUESTION, TG_DOC_RAG_QUESTION,
+    # Token usage
+    TG_IN_TOKEN, TG_OUT_TOKEN,
 )

 from . uris import activity_uri, agent_uri, subgraph_uri, edge_selection_uri
@ -74,6 +76,17 @@ def _triple(s: str, p: str, o_term: Term) -> Triple:
    return Triple(s=_iri(s), p=_iri(p), o=o_term)


+def _append_token_triples(triples, uri, in_token=None, out_token=None,
+                          model=None):
+    """Append in_token/out_token/model triples when values are present."""
+    if in_token is not None:
+        triples.append(_triple(uri, TG_IN_TOKEN, _literal(str(in_token))))
+    if out_token is not None:
+        triples.append(_triple(uri, TG_OUT_TOKEN, _literal(str(out_token))))
+    if model is not None:
+        triples.append(_triple(uri, TG_LLM_MODEL, _literal(model)))
+
+
 def document_triples(
    doc_uri: str,
    title: Optional[str] = None,
@ -396,6 +409,9 @@ def grounding_triples(
    grounding_uri: str,
    question_uri: str,
    concepts: List[str],
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for a grounding entity (concept decomposition of query).
@ -423,6 +439,8 @@ def grounding_triples(
    for concept in concepts:
        triples.append(_triple(grounding_uri, TG_CONCEPT, _literal(concept)))

+    _append_token_triples(triples, grounding_uri, in_token, out_token, model)
+
    return triples


@ -485,6 +503,9 @@ def focus_triples(
    exploration_uri: str,
    selected_edges_with_reasoning: List[dict],
    session_id: str = "",
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for a focus entity (selected edges with reasoning).
@ -543,6 +564,8 @@ def focus_triples(
                    _triple(edge_sel_uri, TG_REASONING, _literal(reasoning))
                )

+    _append_token_triples(triples, focus_uri, in_token, out_token, model)
+
    return triples


@ -550,6 +573,9 @@ def synthesis_triples(
    synthesis_uri: str,
    focus_uri: str,
    document_id: Optional[str] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for a synthesis entity (final answer).
@ -578,6 +604,8 @@ def synthesis_triples(
    if document_id:
        triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))

+    _append_token_triples(triples, synthesis_uri, in_token, out_token, model)
+
    return triples


@ -674,6 +702,9 @@ def docrag_synthesis_triples(
    synthesis_uri: str,
    exploration_uri: str,
    document_id: Optional[str] = None,
+    in_token: Optional[int] = None,
+    out_token: Optional[int] = None,
+    model: Optional[str] = None,
 ) -> List[Triple]:
    """
    Build triples for a document RAG synthesis entity (final answer).
@ -702,4 +733,6 @@ def docrag_synthesis_triples(
    if document_id:
        triples.append(_triple(synthesis_uri, TG_DOCUMENT, _iri(document_id)))

+    _append_token_triples(triples, synthesis_uri, in_token, out_token, model)
+
    return triples
--- a/trustgraph-base/trustgraph/provenance/uris.py
+++ b/trustgraph-base/trustgraph/provenance/uris.py
@ -259,6 +259,11 @@ def agent_synthesis_uri(session_id: str) -> str:
    return f"urn:trustgraph:agent:{session_id}/synthesis"


+def agent_pattern_decision_uri(session_id: str) -> str:
+    """Generate URI for a meta-router pattern decision."""
+    return f"urn:trustgraph:agent:{session_id}/pattern-decision"
+
+
 # Document RAG provenance URIs
 # These URIs use the urn:trustgraph:docrag: namespace to distinguish
 # document RAG provenance from graph RAG provenance
--- a/trustgraph-base/trustgraph/schema/services/agent.py
+++ b/trustgraph-base/trustgraph/schema/services/agent.py
@ -51,8 +51,8 @@ class AgentRequest:
@dataclass
 class AgentResponse:
    # Streaming-first design
-    chunk_type: str = ""        # "thought", "action", "observation", "answer", "explain", "error"
-    content: str = ""           # The actual content (interpretation depends on chunk_type)
+    message_type: str = ""     # "thought", "action", "observation", "answer", "explain", "error"
+    content: str = ""           # The actual content (interpretation depends on message_type)
    end_of_message: bool = False   # Current chunk type (thought/action/etc.) is complete
    end_of_dialog: bool = False    # Entire agent dialog is complete

@ -66,5 +66,10 @@ class AgentResponse:

    error: Error | None = None

+    # Token usage (populated on end_of_dialog message)
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None
+
 ############################################################################

--- a/trustgraph-base/trustgraph/schema/services/llm.py
+++ b/trustgraph-base/trustgraph/schema/services/llm.py
@ -17,9 +17,9 @@ class TextCompletionRequest:
 class TextCompletionResponse:
    error: Error | None = None
    response: str = ""
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None
    end_of_stream: bool = False  # Indicates final message in stream

 ############################################################################
--- a/trustgraph-base/trustgraph/schema/services/prompt.py
+++ b/trustgraph-base/trustgraph/schema/services/prompt.py
@ -41,4 +41,9 @@ class PromptResponse:
    # Indicates final message in stream
    end_of_stream: bool = False

+    # Token usage from the underlying text completion
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None
+
 ############################################################################
--- a/trustgraph-base/trustgraph/schema/services/retrieval.py
+++ b/trustgraph-base/trustgraph/schema/services/retrieval.py
@ -29,6 +29,9 @@ class GraphRagResponse:
    explain_triples: list[Triple] = field(default_factory=list)  # Provenance triples for this step
    message_type: str = ""            # "chunk" or "explain"
    end_of_session: bool = False      # Entire session complete
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None

 ############################################################################

@ -52,3 +55,6 @@ class DocumentRagResponse:
    explain_triples: list[Triple] = field(default_factory=list)  # Provenance triples for this step
    message_type: str = ""            # "chunk" or "explain"
    end_of_session: bool = False      # Entire session complete
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None