token errors

This commit is contained in:
Cyber MacGeddon 2026-04-12 16:42:39 +01:00
parent fc9c0cf0b4
commit 05c29de5bc
11 changed files with 92 additions and 61 deletions

View file

@ -476,9 +476,9 @@ class AsyncFlowInstance:
result = await self.request("text-completion", request_data)
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token", 0) or 0,
out_token=result.get("out_token", 0) or 0,
model=result.get("model", "") or "",
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
async def graph_rag(self, query: str, user: str, collection: str,

View file

@ -212,9 +212,9 @@ class AsyncSocketClient:
content=content,
end_of_stream=resp.get("end_of_stream", False),
error=None,
in_token=resp.get("in_token", 0) or 0,
out_token=resp.get("out_token", 0) or 0,
model=resp.get("model", "") or "",
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
async def aclose(self):
@ -290,9 +290,9 @@ class AsyncSocketFlowInstance:
result = await self.client._send_request("text-completion", self.flow_id, request)
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token", 0) or 0,
out_token=result.get("out_token", 0) or 0,
model=result.get("model", "") or "",
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
async def _text_completion_streaming(self, request):

View file

@ -387,9 +387,9 @@ class FlowInstance:
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token", 0) or 0,
out_token=result.get("out_token", 0) or 0,
model=result.get("model", "") or "",
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def agent(self, question, user="trustgraph", state=None, group=None, history=None):

View file

@ -405,9 +405,9 @@ class SocketClient:
content=content,
end_of_stream=resp.get("end_of_stream", False),
error=None,
in_token=resp.get("in_token", 0) or 0,
out_token=resp.get("out_token", 0) or 0,
model=resp.get("model", "") or "",
in_token=resp.get("in_token"),
out_token=resp.get("out_token"),
model=resp.get("model"),
)
def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
@ -566,9 +566,9 @@ class SocketFlowInstance:
else:
return TextCompletionResult(
text=result.get("response", ""),
in_token=result.get("in_token", 0) or 0,
out_token=result.get("out_token", 0) or 0,
model=result.get("model", "") or "",
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
@ -696,10 +696,10 @@ class SocketFlowInstance:
streaming=True, include_provenance=True
)
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
for chunk in result:
if hasattr(chunk, 'content'):
yield chunk.content
if isinstance(chunk, RAGChunk):
yield chunk
def prompt(
self,
@ -707,8 +707,12 @@ class SocketFlowInstance:
variables: Dict[str, str],
streaming: bool = False,
**kwargs: Any
) -> Union[str, Iterator[str]]:
"""Execute a prompt template with optional streaming."""
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
"""Execute a prompt template with optional streaming.
Non-streaming: returns a TextCompletionResult with text and token counts.
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
"""
request = {
"id": id,
"variables": variables,
@ -721,7 +725,12 @@ class SocketFlowInstance:
if streaming:
return self._rag_generator(result)
else:
return result.get("response", "")
return TextCompletionResult(
text=result.get("text", result.get("response", "")),
in_token=result.get("in_token"),
out_token=result.get("out_token"),
model=result.get("model"),
)
def graph_embeddings_query(
self,

View file

@ -210,9 +210,9 @@ class RAGChunk(StreamingChunk):
chunk_type: str = "rag"
end_of_stream: bool = False
error: Optional[Dict[str, str]] = None
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
@dataclasses.dataclass
class TextCompletionResult:
@ -225,14 +225,14 @@ class TextCompletionResult:
Attributes:
text: Complete response text (None in streaming mode)
in_token: Input token count
out_token: Output token count
model: Model identifier
in_token: Input token count (None if not available)
out_token: Output token count (None if not available)
model: Model identifier (None if not available)
"""
text: Optional[str]
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
@dataclasses.dataclass
class ProvenanceEvent:

View file

@ -8,9 +8,9 @@ from .. schema import TextCompletionRequest, TextCompletionResponse
@dataclass
class TextCompletionResult:
text: Optional[str]
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: Optional[int] = None
out_token: Optional[int] = None
model: Optional[str] = None
class TextCompletionClient(RequestResponse):
@ -28,9 +28,9 @@ class TextCompletionClient(RequestResponse):
return TextCompletionResult(
text = resp.response,
in_token = getattr(resp, "in_token", 0) or 0,
out_token = getattr(resp, "out_token", 0) or 0,
model = getattr(resp, "model", "") or "",
in_token = resp.in_token,
out_token = resp.out_token,
model = resp.model,
)
async def text_completion_stream(
@ -62,9 +62,9 @@ class TextCompletionClient(RequestResponse):
return TextCompletionResult(
text = None,
in_token = getattr(final, "in_token", 0) or 0,
out_token = getattr(final, "out_token", 0) or 0,
model = getattr(final, "model", "") or "",
in_token = final.in_token,
out_token = final.out_token,
model = final.model,
)
class TextCompletionClientSpec(RequestResponseSpec):

View file

@ -53,11 +53,11 @@ class PromptResponseTranslator(MessageTranslator):
# Always include end_of_stream flag for streaming support
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
if obj.in_token:
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token:
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model:
if obj.model is not None:
result["model"] = obj.model
return result

View file

@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
result = {"response": obj.response}
if obj.in_token:
if obj.in_token is not None:
result["in_token"] = obj.in_token
if obj.out_token:
if obj.out_token is not None:
result["out_token"] = obj.out_token
if obj.model:
if obj.model is not None:
result["model"] = obj.model
# Always include end_of_stream flag for streaming support

View file

@ -17,9 +17,9 @@ class TextCompletionRequest:
class TextCompletionResponse:
error: Error | None = None
response: str = ""
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: int | None = None
out_token: int | None = None
model: str | None = None
end_of_stream: bool = False # Indicates final message in stream
############################################################################

View file

@ -42,8 +42,8 @@ class PromptResponse:
end_of_stream: bool = False
# Token usage from the underlying text completion
in_token: int = 0
out_token: int = 0
model: str = ""
in_token: int | None = None
out_token: int | None = None
model: str | None = None
############################################################################

View file

@ -15,7 +15,8 @@ from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def query(url, flow_id, template_id, variables, streaming=True, token=None):
def query(url, flow_id, template_id, variables, streaming=True, token=None,
show_usage=False):
# Create API client
api = Api(url=url, token=token)
@ -31,16 +32,30 @@ def query(url, flow_id, template_id, variables, streaming=True, token=None):
)
if streaming:
# Stream output (prompt yields strings directly)
last_chunk = None
for chunk in response:
if chunk:
print(chunk, end="", flush=True)
# Add final newline after streaming
if chunk.content:
print(chunk.content, end="", flush=True)
last_chunk = chunk
print()
if show_usage and last_chunk:
print(
f"Input tokens: {last_chunk.in_token} "
f"Output tokens: {last_chunk.out_token} "
f"Model: {last_chunk.model}",
file=__import__('sys').stderr,
)
else:
# Non-streaming: print complete response
print(response)
print(response.text)
if show_usage:
print(
f"Input tokens: {response.in_token} "
f"Output tokens: {response.out_token} "
f"Model: {response.model}",
file=__import__('sys').stderr,
)
finally:
# Clean up socket connection
@ -92,6 +107,12 @@ specified multiple times''',
help='Disable streaming (default: streaming enabled for text responses)'
)
parser.add_argument(
'--show-usage',
action='store_true',
help='Show token usage and model on stderr'
)
args = parser.parse_args()
variables = {}
@ -113,6 +134,7 @@ specified multiple times''',
variables=variables,
streaming=not args.no_streaming,
token=args.token,
show_usage=args.show_usage,
)
except Exception as e: