From 05c29de5bc7d010861c70300878609bbb00f7524 Mon Sep 17 00:00:00 2001
From: Cyber MacGeddon <cybermaggedon@gmail.com>
Date: Sun, 12 Apr 2026 16:42:39 +0100
Subject: [PATCH] token errors

---
 trustgraph-base/trustgraph/api/async_flow.py  |  6 ++--
 .../trustgraph/api/async_socket_client.py     | 12 +++----
 trustgraph-base/trustgraph/api/flow.py        |  6 ++--
 .../trustgraph/api/socket_client.py           | 33 ++++++++++-------
 trustgraph-base/trustgraph/api/types.py       | 18 +++++-----
 .../trustgraph/base/text_completion_client.py | 18 +++++-----
 .../messaging/translators/prompt.py           |  6 ++--
 .../messaging/translators/text_completion.py  |  6 ++--
 .../trustgraph/schema/services/llm.py         |  6 ++--
 .../trustgraph/schema/services/prompt.py      |  6 ++--
 .../trustgraph/cli/invoke_prompt.py           | 36 +++++++++++++++----
 11 files changed, 92 insertions(+), 61 deletions(-)

diff --git a/trustgraph-base/trustgraph/api/async_flow.py b/trustgraph-base/trustgraph/api/async_flow.py
index 9dd110c6..68899341 100644
--- a/trustgraph-base/trustgraph/api/async_flow.py
+++ b/trustgraph-base/trustgraph/api/async_flow.py
@@ -476,9 +476,9 @@ class AsyncFlowInstance:
         result = await self.request("text-completion", request_data)
         return TextCompletionResult(
             text=result.get("response", ""),
-            in_token=result.get("in_token", 0) or 0,
-            out_token=result.get("out_token", 0) or 0,
-            model=result.get("model", "") or "",
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
         )
 
     async def graph_rag(self, query: str, user: str, collection: str,
diff --git a/trustgraph-base/trustgraph/api/async_socket_client.py b/trustgraph-base/trustgraph/api/async_socket_client.py
index 9c9c30d5..e8957cb6 100644
--- a/trustgraph-base/trustgraph/api/async_socket_client.py
+++ b/trustgraph-base/trustgraph/api/async_socket_client.py
@@ -212,9 +212,9 @@ class AsyncSocketClient:
                 content=content,
                 end_of_stream=resp.get("end_of_stream", False),
                 error=None,
-                in_token=resp.get("in_token", 0) or 0,
-                out_token=resp.get("out_token", 0) or 0,
-                model=resp.get("model", "") or "",
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
             )
 
     async def aclose(self):
@@ -290,9 +290,9 @@ class AsyncSocketFlowInstance:
             result = await self.client._send_request("text-completion", self.flow_id, request)
             return TextCompletionResult(
                 text=result.get("response", ""),
-                in_token=result.get("in_token", 0) or 0,
-                out_token=result.get("out_token", 0) or 0,
-                model=result.get("model", "") or "",
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
             )
 
     async def _text_completion_streaming(self, request):
diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py
index b39f3db9..d505c143 100644
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@@ -387,9 +387,9 @@ class FlowInstance:
 
         return TextCompletionResult(
             text=result.get("response", ""),
-            in_token=result.get("in_token", 0) or 0,
-            out_token=result.get("out_token", 0) or 0,
-            model=result.get("model", "") or "",
+            in_token=result.get("in_token"),
+            out_token=result.get("out_token"),
+            model=result.get("model"),
         )
 
     def agent(self, question, user="trustgraph", state=None, group=None, history=None):
diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py
index 68db0ef7..234f003d 100644
--- a/trustgraph-base/trustgraph/api/socket_client.py
+++ b/trustgraph-base/trustgraph/api/socket_client.py
@@ -405,9 +405,9 @@ class SocketClient:
                 content=content,
                 end_of_stream=resp.get("end_of_stream", False),
                 error=None,
-                in_token=resp.get("in_token", 0) or 0,
-                out_token=resp.get("out_token", 0) or 0,
-                model=resp.get("model", "") or "",
+                in_token=resp.get("in_token"),
+                out_token=resp.get("out_token"),
+                model=resp.get("model"),
             )
 
     def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
@@ -566,9 +566,9 @@ class SocketFlowInstance:
         else:
             return TextCompletionResult(
                 text=result.get("response", ""),
-                in_token=result.get("in_token", 0) or 0,
-                out_token=result.get("out_token", 0) or 0,
-                model=result.get("model", "") or "",
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
             )
 
     def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
@@ -696,10 +696,10 @@ class SocketFlowInstance:
             streaming=True, include_provenance=True
         )
 
-    def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
+    def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
         for chunk in result:
-            if hasattr(chunk, 'content'):
-                yield chunk.content
+            if isinstance(chunk, RAGChunk):
+                yield chunk
 
     def prompt(
         self,
@@ -707,8 +707,12 @@ class SocketFlowInstance:
         variables: Dict[str, str],
         streaming: bool = False,
         **kwargs: Any
-    ) -> Union[str, Iterator[str]]:
-        """Execute a prompt template with optional streaming."""
+    ) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
+        """Execute a prompt template with optional streaming.
+
+        Non-streaming: returns a TextCompletionResult with text and token counts.
+        Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
+        """
         request = {
             "id": id,
             "variables": variables,
@@ -721,7 +725,12 @@ class SocketFlowInstance:
         if streaming:
             return self._rag_generator(result)
         else:
-            return result.get("response", "")
+            return TextCompletionResult(
+                text=result.get("text", result.get("response", "")),
+                in_token=result.get("in_token"),
+                out_token=result.get("out_token"),
+                model=result.get("model"),
+            )
 
     def graph_embeddings_query(
         self,
diff --git a/trustgraph-base/trustgraph/api/types.py b/trustgraph-base/trustgraph/api/types.py
index da05e76d..a6f7f11e 100644
--- a/trustgraph-base/trustgraph/api/types.py
+++ b/trustgraph-base/trustgraph/api/types.py
@@ -210,9 +210,9 @@ class RAGChunk(StreamingChunk):
     chunk_type: str = "rag"
     end_of_stream: bool = False
     error: Optional[Dict[str, str]] = None
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
 
 @dataclasses.dataclass
 class TextCompletionResult:
@@ -225,14 +225,14 @@ class TextCompletionResult:
 
     Attributes:
         text: Complete response text (None in streaming mode)
-        in_token: Input token count
-        out_token: Output token count
-        model: Model identifier
+        in_token: Input token count (None if not available)
+        out_token: Output token count (None if not available)
+        model: Model identifier (None if not available)
     """
     text: Optional[str]
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
 
 @dataclasses.dataclass
 class ProvenanceEvent:
diff --git a/trustgraph-base/trustgraph/base/text_completion_client.py b/trustgraph-base/trustgraph/base/text_completion_client.py
index 0a1358dc..876d71df 100644
--- a/trustgraph-base/trustgraph/base/text_completion_client.py
+++ b/trustgraph-base/trustgraph/base/text_completion_client.py
@@ -8,9 +8,9 @@ from .. schema import TextCompletionRequest, TextCompletionResponse
 @dataclass
 class TextCompletionResult:
     text: Optional[str]
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: Optional[int] = None
+    out_token: Optional[int] = None
+    model: Optional[str] = None
 
 class TextCompletionClient(RequestResponse):
 
@@ -28,9 +28,9 @@ class TextCompletionClient(RequestResponse):
 
         return TextCompletionResult(
             text = resp.response,
-            in_token = getattr(resp, "in_token", 0) or 0,
-            out_token = getattr(resp, "out_token", 0) or 0,
-            model = getattr(resp, "model", "") or "",
+            in_token = resp.in_token,
+            out_token = resp.out_token,
+            model = resp.model,
         )
 
     async def text_completion_stream(
@@ -62,9 +62,9 @@ class TextCompletionClient(RequestResponse):
 
         return TextCompletionResult(
             text = None,
-            in_token = getattr(final, "in_token", 0) or 0,
-            out_token = getattr(final, "out_token", 0) or 0,
-            model = getattr(final, "model", "") or "",
+            in_token = final.in_token,
+            out_token = final.out_token,
+            model = final.model,
         )
 
 class TextCompletionClientSpec(RequestResponseSpec):
diff --git a/trustgraph-base/trustgraph/messaging/translators/prompt.py b/trustgraph-base/trustgraph/messaging/translators/prompt.py
index 1b9e33bd..7f76bf4a 100644
--- a/trustgraph-base/trustgraph/messaging/translators/prompt.py
+++ b/trustgraph-base/trustgraph/messaging/translators/prompt.py
@@ -53,11 +53,11 @@ class PromptResponseTranslator(MessageTranslator):
         # Always include end_of_stream flag for streaming support
         result["end_of_stream"] = getattr(obj, "end_of_stream", False)
 
-        if obj.in_token:
+        if obj.in_token is not None:
             result["in_token"] = obj.in_token
-        if obj.out_token:
+        if obj.out_token is not None:
             result["out_token"] = obj.out_token
-        if obj.model:
+        if obj.model is not None:
             result["model"] = obj.model
 
         return result
diff --git a/trustgraph-base/trustgraph/messaging/translators/text_completion.py b/trustgraph-base/trustgraph/messaging/translators/text_completion.py
index 596ff744..62cc4afb 100644
--- a/trustgraph-base/trustgraph/messaging/translators/text_completion.py
+++ b/trustgraph-base/trustgraph/messaging/translators/text_completion.py
@@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
     def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
         result = {"response": obj.response}
 
-        if obj.in_token:
+        if obj.in_token is not None:
             result["in_token"] = obj.in_token
-        if obj.out_token:
+        if obj.out_token is not None:
             result["out_token"] = obj.out_token
-        if obj.model:
+        if obj.model is not None:
             result["model"] = obj.model
 
         # Always include end_of_stream flag for streaming support
diff --git a/trustgraph-base/trustgraph/schema/services/llm.py b/trustgraph-base/trustgraph/schema/services/llm.py
index 0fd6ab90..89c0cd54 100644
--- a/trustgraph-base/trustgraph/schema/services/llm.py
+++ b/trustgraph-base/trustgraph/schema/services/llm.py
@@ -17,9 +17,9 @@ class TextCompletionRequest:
 class TextCompletionResponse:
     error: Error | None = None
     response: str = ""
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None
     end_of_stream: bool = False  # Indicates final message in stream
 
 ############################################################################
diff --git a/trustgraph-base/trustgraph/schema/services/prompt.py b/trustgraph-base/trustgraph/schema/services/prompt.py
index 1d56702b..1696790b 100644
--- a/trustgraph-base/trustgraph/schema/services/prompt.py
+++ b/trustgraph-base/trustgraph/schema/services/prompt.py
@@ -42,8 +42,8 @@ class PromptResponse:
     end_of_stream: bool = False
 
     # Token usage from the underlying text completion
-    in_token: int = 0
-    out_token: int = 0
-    model: str = ""
+    in_token: int | None = None
+    out_token: int | None = None
+    model: str | None = None
 
 ############################################################################
\ No newline at end of file
diff --git a/trustgraph-cli/trustgraph/cli/invoke_prompt.py b/trustgraph-cli/trustgraph/cli/invoke_prompt.py
index 09cc9043..86f7a024 100644
--- a/trustgraph-cli/trustgraph/cli/invoke_prompt.py
+++ b/trustgraph-cli/trustgraph/cli/invoke_prompt.py
@@ -15,7 +15,8 @@ from trustgraph.api import Api
 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
 default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 
-def query(url, flow_id, template_id, variables, streaming=True, token=None):
+def query(url, flow_id, template_id, variables, streaming=True, token=None,
+          show_usage=False):
 
     # Create API client
     api = Api(url=url, token=token)
@@ -31,16 +32,30 @@ def query(url, flow_id, template_id, variables, streaming=True, token=None):
         )
 
         if streaming:
-            # Stream output (prompt yields strings directly)
+            last_chunk = None
             for chunk in response:
-                if chunk:
-                    print(chunk, end="", flush=True)
-            # Add final newline after streaming
+                if chunk.content:
+                    print(chunk.content, end="", flush=True)
+                last_chunk = chunk
             print()
 
+            if show_usage and last_chunk:
+                print(
+                    f"Input tokens: {last_chunk.in_token}  "
+                    f"Output tokens: {last_chunk.out_token}  "
+                    f"Model: {last_chunk.model}",
+                    file=__import__('sys').stderr,
+                )
         else:
-            # Non-streaming: print complete response
-            print(response)
+            print(response.text)
+
+            if show_usage:
+                print(
+                    f"Input tokens: {response.in_token}  "
+                    f"Output tokens: {response.out_token}  "
+                    f"Model: {response.model}",
+                    file=__import__('sys').stderr,
+                )
 
     finally:
         # Clean up socket connection
@@ -92,6 +107,12 @@ specified multiple times''',
         help='Disable streaming (default: streaming enabled for text responses)'
     )
 
+    parser.add_argument(
+        '--show-usage',
+        action='store_true',
+        help='Show token usage and model on stderr'
+    )
+
     args = parser.parse_args()
 
     variables = {}
@@ -113,6 +134,7 @@ specified multiple times''',
             variables=variables,
             streaming=not args.no_streaming,
             token=args.token,
+            show_usage=args.show_usage,
         )
 
     except Exception as e: