From 05c29de5bc7d010861c70300878609bbb00f7524 Mon Sep 17 00:00:00 2001 From: Cyber MacGeddon Date: Sun, 12 Apr 2026 16:42:39 +0100 Subject: [PATCH] token errors --- trustgraph-base/trustgraph/api/async_flow.py | 6 ++-- .../trustgraph/api/async_socket_client.py | 12 +++---- trustgraph-base/trustgraph/api/flow.py | 6 ++-- .../trustgraph/api/socket_client.py | 33 ++++++++++------- trustgraph-base/trustgraph/api/types.py | 18 +++++----- .../trustgraph/base/text_completion_client.py | 18 +++++----- .../messaging/translators/prompt.py | 6 ++-- .../messaging/translators/text_completion.py | 6 ++-- .../trustgraph/schema/services/llm.py | 6 ++-- .../trustgraph/schema/services/prompt.py | 6 ++-- .../trustgraph/cli/invoke_prompt.py | 36 +++++++++++++++---- 11 files changed, 92 insertions(+), 61 deletions(-) diff --git a/trustgraph-base/trustgraph/api/async_flow.py b/trustgraph-base/trustgraph/api/async_flow.py index 9dd110c6..68899341 100644 --- a/trustgraph-base/trustgraph/api/async_flow.py +++ b/trustgraph-base/trustgraph/api/async_flow.py @@ -476,9 +476,9 @@ class AsyncFlowInstance: result = await self.request("text-completion", request_data) return TextCompletionResult( text=result.get("response", ""), - in_token=result.get("in_token", 0) or 0, - out_token=result.get("out_token", 0) or 0, - model=result.get("model", "") or "", + in_token=result.get("in_token"), + out_token=result.get("out_token"), + model=result.get("model"), ) async def graph_rag(self, query: str, user: str, collection: str, diff --git a/trustgraph-base/trustgraph/api/async_socket_client.py b/trustgraph-base/trustgraph/api/async_socket_client.py index 9c9c30d5..e8957cb6 100644 --- a/trustgraph-base/trustgraph/api/async_socket_client.py +++ b/trustgraph-base/trustgraph/api/async_socket_client.py @@ -212,9 +212,9 @@ class AsyncSocketClient: content=content, end_of_stream=resp.get("end_of_stream", False), error=None, - in_token=resp.get("in_token", 0) or 0, - out_token=resp.get("out_token", 0) or 0, - model=resp.get("model", "") or "", + in_token=resp.get("in_token"), + out_token=resp.get("out_token"), + model=resp.get("model"), ) async def aclose(self): @@ -290,9 +290,9 @@ class AsyncSocketFlowInstance: result = await self.client._send_request("text-completion", self.flow_id, request) return TextCompletionResult( text=result.get("response", ""), - in_token=result.get("in_token", 0) or 0, - out_token=result.get("out_token", 0) or 0, - model=result.get("model", "") or "", + in_token=result.get("in_token"), + out_token=result.get("out_token"), + model=result.get("model"), ) async def _text_completion_streaming(self, request): diff --git a/trustgraph-base/trustgraph/api/flow.py b/trustgraph-base/trustgraph/api/flow.py index b39f3db9..d505c143 100644 --- a/trustgraph-base/trustgraph/api/flow.py +++ b/trustgraph-base/trustgraph/api/flow.py @@ -387,9 +387,9 @@ class FlowInstance: return TextCompletionResult( text=result.get("response", ""), - in_token=result.get("in_token", 0) or 0, - out_token=result.get("out_token", 0) or 0, - model=result.get("model", "") or "", + in_token=result.get("in_token"), + out_token=result.get("out_token"), + model=result.get("model"), ) def agent(self, question, user="trustgraph", state=None, group=None, history=None): diff --git a/trustgraph-base/trustgraph/api/socket_client.py b/trustgraph-base/trustgraph/api/socket_client.py index 68db0ef7..234f003d 100644 --- a/trustgraph-base/trustgraph/api/socket_client.py +++ b/trustgraph-base/trustgraph/api/socket_client.py @@ -405,9 +405,9 @@ class SocketClient: content=content, end_of_stream=resp.get("end_of_stream", False), error=None, - in_token=resp.get("in_token", 0) or 0, - out_token=resp.get("out_token", 0) or 0, - model=resp.get("model", "") or "", + in_token=resp.get("in_token"), + out_token=resp.get("out_token"), + model=resp.get("model"), ) def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent: @@ -566,9 +566,9 @@ class SocketFlowInstance: else: return TextCompletionResult( text=result.get("response", ""), - in_token=result.get("in_token", 0) or 0, - out_token=result.get("out_token", 0) or 0, - model=result.get("model", "") or "", + in_token=result.get("in_token"), + out_token=result.get("out_token"), + model=result.get("model"), ) def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]: @@ -696,10 +696,10 @@ class SocketFlowInstance: streaming=True, include_provenance=True ) - def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]: + def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]: for chunk in result: - if hasattr(chunk, 'content'): - yield chunk.content + if isinstance(chunk, RAGChunk): + yield chunk def prompt( self, @@ -707,8 +707,12 @@ class SocketFlowInstance: variables: Dict[str, str], streaming: bool = False, **kwargs: Any - ) -> Union[str, Iterator[str]]: - """Execute a prompt template with optional streaming.""" + ) -> Union[TextCompletionResult, Iterator[RAGChunk]]: + """Execute a prompt template with optional streaming. + + Non-streaming: returns a TextCompletionResult with text and token counts. + Streaming: returns an iterator of RAGChunk (with token counts on the final chunk). + """ request = { "id": id, "variables": variables, @@ -721,7 +725,12 @@ class SocketFlowInstance: if streaming: return self._rag_generator(result) else: - return result.get("response", "") + return TextCompletionResult( + text=result.get("text", result.get("response", "")), + in_token=result.get("in_token"), + out_token=result.get("out_token"), + model=result.get("model"), + ) def graph_embeddings_query( self, diff --git a/trustgraph-base/trustgraph/api/types.py b/trustgraph-base/trustgraph/api/types.py index da05e76d..a6f7f11e 100644 --- a/trustgraph-base/trustgraph/api/types.py +++ b/trustgraph-base/trustgraph/api/types.py @@ -210,9 +210,9 @@ class RAGChunk(StreamingChunk): chunk_type: str = "rag" end_of_stream: bool = False error: Optional[Dict[str, str]] = None - in_token: int = 0 - out_token: int = 0 - model: str = "" + in_token: Optional[int] = None + out_token: Optional[int] = None + model: Optional[str] = None @dataclasses.dataclass class TextCompletionResult: @@ -225,14 +225,14 @@ class TextCompletionResult: Attributes: text: Complete response text (None in streaming mode) - in_token: Input token count - out_token: Output token count - model: Model identifier + in_token: Input token count (None if not available) + out_token: Output token count (None if not available) + model: Model identifier (None if not available) """ text: Optional[str] - in_token: int = 0 - out_token: int = 0 - model: str = "" + in_token: Optional[int] = None + out_token: Optional[int] = None + model: Optional[str] = None @dataclasses.dataclass class ProvenanceEvent: diff --git a/trustgraph-base/trustgraph/base/text_completion_client.py b/trustgraph-base/trustgraph/base/text_completion_client.py index 0a1358dc..876d71df 100644 --- a/trustgraph-base/trustgraph/base/text_completion_client.py +++ b/trustgraph-base/trustgraph/base/text_completion_client.py @@ -8,9 +8,9 @@ from .. schema import TextCompletionRequest, TextCompletionResponse @dataclass class TextCompletionResult: text: Optional[str] - in_token: int = 0 - out_token: int = 0 - model: str = "" + in_token: Optional[int] = None + out_token: Optional[int] = None + model: Optional[str] = None class TextCompletionClient(RequestResponse): @@ -28,9 +28,9 @@ class TextCompletionClient(RequestResponse): return TextCompletionResult( text = resp.response, - in_token = getattr(resp, "in_token", 0) or 0, - out_token = getattr(resp, "out_token", 0) or 0, - model = getattr(resp, "model", "") or "", + in_token = resp.in_token, + out_token = resp.out_token, + model = resp.model, ) async def text_completion_stream( @@ -62,9 +62,9 @@ class TextCompletionClient(RequestResponse): return TextCompletionResult( text = None, - in_token = getattr(final, "in_token", 0) or 0, - out_token = getattr(final, "out_token", 0) or 0, - model = getattr(final, "model", "") or "", + in_token = final.in_token, + out_token = final.out_token, + model = final.model, ) class TextCompletionClientSpec(RequestResponseSpec): diff --git a/trustgraph-base/trustgraph/messaging/translators/prompt.py b/trustgraph-base/trustgraph/messaging/translators/prompt.py index 1b9e33bd..7f76bf4a 100644 --- a/trustgraph-base/trustgraph/messaging/translators/prompt.py +++ b/trustgraph-base/trustgraph/messaging/translators/prompt.py @@ -53,11 +53,11 @@ class PromptResponseTranslator(MessageTranslator): # Always include end_of_stream flag for streaming support result["end_of_stream"] = getattr(obj, "end_of_stream", False) - if obj.in_token: + if obj.in_token is not None: result["in_token"] = obj.in_token - if obj.out_token: + if obj.out_token is not None: result["out_token"] = obj.out_token - if obj.model: + if obj.model is not None: result["model"] = obj.model return result diff --git a/trustgraph-base/trustgraph/messaging/translators/text_completion.py b/trustgraph-base/trustgraph/messaging/translators/text_completion.py index 596ff744..62cc4afb 100644 --- a/trustgraph-base/trustgraph/messaging/translators/text_completion.py +++ b/trustgraph-base/trustgraph/messaging/translators/text_completion.py @@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator): def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]: result = {"response": obj.response} - if obj.in_token: + if obj.in_token is not None: result["in_token"] = obj.in_token - if obj.out_token: + if obj.out_token is not None: result["out_token"] = obj.out_token - if obj.model: + if obj.model is not None: result["model"] = obj.model # Always include end_of_stream flag for streaming support diff --git a/trustgraph-base/trustgraph/schema/services/llm.py b/trustgraph-base/trustgraph/schema/services/llm.py index 0fd6ab90..89c0cd54 100644 --- a/trustgraph-base/trustgraph/schema/services/llm.py +++ b/trustgraph-base/trustgraph/schema/services/llm.py @@ -17,9 +17,9 @@ class TextCompletionRequest: class TextCompletionResponse: error: Error | None = None response: str = "" - in_token: int = 0 - out_token: int = 0 - model: str = "" + in_token: int | None = None + out_token: int | None = None + model: str | None = None end_of_stream: bool = False # Indicates final message in stream ############################################################################ diff --git a/trustgraph-base/trustgraph/schema/services/prompt.py b/trustgraph-base/trustgraph/schema/services/prompt.py index 1d56702b..1696790b 100644 --- a/trustgraph-base/trustgraph/schema/services/prompt.py +++ b/trustgraph-base/trustgraph/schema/services/prompt.py @@ -42,8 +42,8 @@ class PromptResponse: end_of_stream: bool = False # Token usage from the underlying text completion - in_token: int = 0 - out_token: int = 0 - model: str = "" + in_token: int | None = None + out_token: int | None = None + model: str | None = None ############################################################################ \ No newline at end of file diff --git a/trustgraph-cli/trustgraph/cli/invoke_prompt.py b/trustgraph-cli/trustgraph/cli/invoke_prompt.py index 09cc9043..86f7a024 100644 --- a/trustgraph-cli/trustgraph/cli/invoke_prompt.py +++ b/trustgraph-cli/trustgraph/cli/invoke_prompt.py @@ -15,7 +15,8 @@ from trustgraph.api import Api default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/') default_token = os.getenv("TRUSTGRAPH_TOKEN", None) -def query(url, flow_id, template_id, variables, streaming=True, token=None): +def query(url, flow_id, template_id, variables, streaming=True, token=None, + show_usage=False): # Create API client api = Api(url=url, token=token) @@ -31,16 +32,30 @@ def query(url, flow_id, template_id, variables, streaming=True, token=None): ) if streaming: - # Stream output (prompt yields strings directly) + last_chunk = None for chunk in response: - if chunk: - print(chunk, end="", flush=True) - # Add final newline after streaming + if chunk.content: + print(chunk.content, end="", flush=True) + last_chunk = chunk print() + if show_usage and last_chunk: + print( + f"Input tokens: {last_chunk.in_token} " + f"Output tokens: {last_chunk.out_token} " + f"Model: {last_chunk.model}", + file=__import__('sys').stderr, + ) else: - # Non-streaming: print complete response - print(response) + print(response.text) + + if show_usage: + print( + f"Input tokens: {response.in_token} " + f"Output tokens: {response.out_token} " + f"Model: {response.model}", + file=__import__('sys').stderr, + ) finally: # Clean up socket connection @@ -92,6 +107,12 @@ specified multiple times''', help='Disable streaming (default: streaming enabled for text responses)' ) + parser.add_argument( + '--show-usage', + action='store_true', + help='Show token usage and model on stderr' + ) + args = parser.parse_args() variables = {} @@ -113,6 +134,7 @@ specified multiple times''', variables=variables, streaming=not args.no_streaming, token=args.token, + show_usage=args.show_usage, ) except Exception as e: