diff --git a/trustgraph-flow/trustgraph/prompt/template/service.py b/trustgraph-flow/trustgraph/prompt/template/service.py index c9c0c87b..c599ce77 100755 --- a/trustgraph-flow/trustgraph/prompt/template/service.py +++ b/trustgraph-flow/trustgraph/prompt/template/service.py @@ -128,12 +128,14 @@ class Processor(FlowProcessor): # Always send a message if there's content OR if it's the final message if resp.response or is_final: - # Forward each chunk immediately r = PromptResponse( text=resp.response if resp.response else "", object=None, error=None, end_of_stream=is_final, + in_token=resp.in_token, + out_token=resp.out_token, + model=resp.model, ) await flow("response").send(r, properties={"id": id}) @@ -155,6 +157,8 @@ class Processor(FlowProcessor): return # Non-streaming path (original behavior) + usage = {} + async def llm(system, prompt): logger.debug(f"System prompt: {system}") @@ -164,6 +168,9 @@ class Processor(FlowProcessor): result = await flow("text-completion-request").text_completion( system = system, prompt = prompt, ) + usage["in_token"] = result.in_token + usage["out_token"] = result.out_token + usage["model"] = result.model return result.text except Exception as e: logger.error(f"LLM Exception: {e}", exc_info=True) @@ -186,6 +193,9 @@ class Processor(FlowProcessor): object=None, error=None, end_of_stream=True, + in_token=usage.get("in_token", 0), + out_token=usage.get("out_token", 0), + model=usage.get("model", ""), ) await flow("response").send(r, properties={"id": id}) @@ -202,6 +212,9 @@ class Processor(FlowProcessor): object=json.dumps(resp), error=None, end_of_stream=True, + in_token=usage.get("in_token", 0), + out_token=usage.get("out_token", 0), + model=usage.get("model", ""), ) await flow("response").send(r, properties={"id": id})