mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-12 08:15:14 +02:00
token errors
This commit is contained in:
parent
fc9c0cf0b4
commit
05c29de5bc
11 changed files with 92 additions and 61 deletions
|
|
@ -476,9 +476,9 @@ class AsyncFlowInstance:
|
|||
result = await self.request("text-completion", request_data)
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token", 0) or 0,
|
||||
out_token=result.get("out_token", 0) or 0,
|
||||
model=result.get("model", "") or "",
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
async def graph_rag(self, query: str, user: str, collection: str,
|
||||
|
|
|
|||
|
|
@ -212,9 +212,9 @@ class AsyncSocketClient:
|
|||
content=content,
|
||||
end_of_stream=resp.get("end_of_stream", False),
|
||||
error=None,
|
||||
in_token=resp.get("in_token", 0) or 0,
|
||||
out_token=resp.get("out_token", 0) or 0,
|
||||
model=resp.get("model", "") or "",
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
|
||||
async def aclose(self):
|
||||
|
|
@ -290,9 +290,9 @@ class AsyncSocketFlowInstance:
|
|||
result = await self.client._send_request("text-completion", self.flow_id, request)
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token", 0) or 0,
|
||||
out_token=result.get("out_token", 0) or 0,
|
||||
model=result.get("model", "") or "",
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
async def _text_completion_streaming(self, request):
|
||||
|
|
|
|||
|
|
@ -387,9 +387,9 @@ class FlowInstance:
|
|||
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token", 0) or 0,
|
||||
out_token=result.get("out_token", 0) or 0,
|
||||
model=result.get("model", "") or "",
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def agent(self, question, user="trustgraph", state=None, group=None, history=None):
|
||||
|
|
|
|||
|
|
@ -405,9 +405,9 @@ class SocketClient:
|
|||
content=content,
|
||||
end_of_stream=resp.get("end_of_stream", False),
|
||||
error=None,
|
||||
in_token=resp.get("in_token", 0) or 0,
|
||||
out_token=resp.get("out_token", 0) or 0,
|
||||
model=resp.get("model", "") or "",
|
||||
in_token=resp.get("in_token"),
|
||||
out_token=resp.get("out_token"),
|
||||
model=resp.get("model"),
|
||||
)
|
||||
|
||||
def _build_provenance_event(self, resp: Dict[str, Any]) -> ProvenanceEvent:
|
||||
|
|
@ -566,9 +566,9 @@ class SocketFlowInstance:
|
|||
else:
|
||||
return TextCompletionResult(
|
||||
text=result.get("response", ""),
|
||||
in_token=result.get("in_token", 0) or 0,
|
||||
out_token=result.get("out_token", 0) or 0,
|
||||
model=result.get("model", "") or "",
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def _text_completion_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
|
||||
|
|
@ -696,10 +696,10 @@ class SocketFlowInstance:
|
|||
streaming=True, include_provenance=True
|
||||
)
|
||||
|
||||
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[str]:
|
||||
def _rag_generator(self, result: Iterator[StreamingChunk]) -> Iterator[RAGChunk]:
|
||||
for chunk in result:
|
||||
if hasattr(chunk, 'content'):
|
||||
yield chunk.content
|
||||
if isinstance(chunk, RAGChunk):
|
||||
yield chunk
|
||||
|
||||
def prompt(
|
||||
self,
|
||||
|
|
@ -707,8 +707,12 @@ class SocketFlowInstance:
|
|||
variables: Dict[str, str],
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
"""Execute a prompt template with optional streaming."""
|
||||
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
"""Execute a prompt template with optional streaming.
|
||||
|
||||
Non-streaming: returns a TextCompletionResult with text and token counts.
|
||||
Streaming: returns an iterator of RAGChunk (with token counts on the final chunk).
|
||||
"""
|
||||
request = {
|
||||
"id": id,
|
||||
"variables": variables,
|
||||
|
|
@ -721,7 +725,12 @@ class SocketFlowInstance:
|
|||
if streaming:
|
||||
return self._rag_generator(result)
|
||||
else:
|
||||
return result.get("response", "")
|
||||
return TextCompletionResult(
|
||||
text=result.get("text", result.get("response", "")),
|
||||
in_token=result.get("in_token"),
|
||||
out_token=result.get("out_token"),
|
||||
model=result.get("model"),
|
||||
)
|
||||
|
||||
def graph_embeddings_query(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -210,9 +210,9 @@ class RAGChunk(StreamingChunk):
|
|||
chunk_type: str = "rag"
|
||||
end_of_stream: bool = False
|
||||
error: Optional[Dict[str, str]] = None
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TextCompletionResult:
|
||||
|
|
@ -225,14 +225,14 @@ class TextCompletionResult:
|
|||
|
||||
Attributes:
|
||||
text: Complete response text (None in streaming mode)
|
||||
in_token: Input token count
|
||||
out_token: Output token count
|
||||
model: Model identifier
|
||||
in_token: Input token count (None if not available)
|
||||
out_token: Output token count (None if not available)
|
||||
model: Model identifier (None if not available)
|
||||
"""
|
||||
text: Optional[str]
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ProvenanceEvent:
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ from .. schema import TextCompletionRequest, TextCompletionResponse
|
|||
@dataclass
|
||||
class TextCompletionResult:
|
||||
text: Optional[str]
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: Optional[int] = None
|
||||
out_token: Optional[int] = None
|
||||
model: Optional[str] = None
|
||||
|
||||
class TextCompletionClient(RequestResponse):
|
||||
|
||||
|
|
@ -28,9 +28,9 @@ class TextCompletionClient(RequestResponse):
|
|||
|
||||
return TextCompletionResult(
|
||||
text = resp.response,
|
||||
in_token = getattr(resp, "in_token", 0) or 0,
|
||||
out_token = getattr(resp, "out_token", 0) or 0,
|
||||
model = getattr(resp, "model", "") or "",
|
||||
in_token = resp.in_token,
|
||||
out_token = resp.out_token,
|
||||
model = resp.model,
|
||||
)
|
||||
|
||||
async def text_completion_stream(
|
||||
|
|
@ -62,9 +62,9 @@ class TextCompletionClient(RequestResponse):
|
|||
|
||||
return TextCompletionResult(
|
||||
text = None,
|
||||
in_token = getattr(final, "in_token", 0) or 0,
|
||||
out_token = getattr(final, "out_token", 0) or 0,
|
||||
model = getattr(final, "model", "") or "",
|
||||
in_token = final.in_token,
|
||||
out_token = final.out_token,
|
||||
model = final.model,
|
||||
)
|
||||
|
||||
class TextCompletionClientSpec(RequestResponseSpec):
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@ class PromptResponseTranslator(MessageTranslator):
|
|||
# Always include end_of_stream flag for streaming support
|
||||
result["end_of_stream"] = getattr(obj, "end_of_stream", False)
|
||||
|
||||
if obj.in_token:
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token:
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model:
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ class TextCompletionResponseTranslator(MessageTranslator):
|
|||
def encode(self, obj: TextCompletionResponse) -> Dict[str, Any]:
|
||||
result = {"response": obj.response}
|
||||
|
||||
if obj.in_token:
|
||||
if obj.in_token is not None:
|
||||
result["in_token"] = obj.in_token
|
||||
if obj.out_token:
|
||||
if obj.out_token is not None:
|
||||
result["out_token"] = obj.out_token
|
||||
if obj.model:
|
||||
if obj.model is not None:
|
||||
result["model"] = obj.model
|
||||
|
||||
# Always include end_of_stream flag for streaming support
|
||||
|
|
|
|||
|
|
@ -17,9 +17,9 @@ class TextCompletionRequest:
|
|||
class TextCompletionResponse:
|
||||
error: Error | None = None
|
||||
response: str = ""
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
end_of_stream: bool = False # Indicates final message in stream
|
||||
|
||||
############################################################################
|
||||
|
|
|
|||
|
|
@ -42,8 +42,8 @@ class PromptResponse:
|
|||
end_of_stream: bool = False
|
||||
|
||||
# Token usage from the underlying text completion
|
||||
in_token: int = 0
|
||||
out_token: int = 0
|
||||
model: str = ""
|
||||
in_token: int | None = None
|
||||
out_token: int | None = None
|
||||
model: str | None = None
|
||||
|
||||
############################################################################
|
||||
|
|
@ -15,7 +15,8 @@ from trustgraph.api import Api
|
|||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
|
||||
def query(url, flow_id, template_id, variables, streaming=True, token=None):
|
||||
def query(url, flow_id, template_id, variables, streaming=True, token=None,
|
||||
show_usage=False):
|
||||
|
||||
# Create API client
|
||||
api = Api(url=url, token=token)
|
||||
|
|
@ -31,16 +32,30 @@ def query(url, flow_id, template_id, variables, streaming=True, token=None):
|
|||
)
|
||||
|
||||
if streaming:
|
||||
# Stream output (prompt yields strings directly)
|
||||
last_chunk = None
|
||||
for chunk in response:
|
||||
if chunk:
|
||||
print(chunk, end="", flush=True)
|
||||
# Add final newline after streaming
|
||||
if chunk.content:
|
||||
print(chunk.content, end="", flush=True)
|
||||
last_chunk = chunk
|
||||
print()
|
||||
|
||||
if show_usage and last_chunk:
|
||||
print(
|
||||
f"Input tokens: {last_chunk.in_token} "
|
||||
f"Output tokens: {last_chunk.out_token} "
|
||||
f"Model: {last_chunk.model}",
|
||||
file=__import__('sys').stderr,
|
||||
)
|
||||
else:
|
||||
# Non-streaming: print complete response
|
||||
print(response)
|
||||
print(response.text)
|
||||
|
||||
if show_usage:
|
||||
print(
|
||||
f"Input tokens: {response.in_token} "
|
||||
f"Output tokens: {response.out_token} "
|
||||
f"Model: {response.model}",
|
||||
file=__import__('sys').stderr,
|
||||
)
|
||||
|
||||
finally:
|
||||
# Clean up socket connection
|
||||
|
|
@ -92,6 +107,12 @@ specified multiple times''',
|
|||
help='Disable streaming (default: streaming enabled for text responses)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--show-usage',
|
||||
action='store_true',
|
||||
help='Show token usage and model on stderr'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
variables = {}
|
||||
|
|
@ -113,6 +134,7 @@ specified multiple times''',
|
|||
variables=variables,
|
||||
streaming=not args.no_streaming,
|
||||
token=args.token,
|
||||
show_usage=args.show_usage,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue