diff --git a/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py b/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
index 614c1362..4e3db7f9 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/azure/llm.py
@@ -75,6 +75,7 @@ class Processor(LlmService):
 
         if stream:
             data["stream"] = True
+            data["stream_options"] = {"include_usage": True}
 
         body = json.dumps(data)
 
@@ -191,6 +192,9 @@ class Processor(LlmService):
             if response.status_code != 200:
                 raise RuntimeError("LLM failure")
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             # Parse SSE stream
             for line in response.iter_lines():
                 if line:
@@ -215,15 +219,21 @@ class Processor(LlmService):
                                         model=model_name,
                                         is_final=False
                                     )
+
+                            # Capture usage from final chunk
+                            if 'usage' in chunk_data and chunk_data['usage']:
+                                total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
+                                total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
+
                         except json.JSONDecodeError:
                             logger.warning(f"Failed to parse chunk: {data}")
                             continue
 
-            # Send final chunk
+            # Send final chunk with token counts
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py b/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
index 950c006a..4ab0b302 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/azure_openai/llm.py
@@ -161,9 +161,13 @@ class Processor(LlmService):
                 temperature=effective_temperature,
                 max_tokens=self.max_output,
                 top_p=1,
-                stream=True  # Enable streaming
+                stream=True,
+                stream_options={"include_usage": True}
             )
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             # Stream chunks
             for chunk in response:
                 if chunk.choices and chunk.choices[0].delta.content:
@@ -175,11 +179,16 @@ class Processor(LlmService):
                         is_final=False
                     )
 
-            # Send final chunk
+                # Capture usage from final chunk
+                if chunk.usage:
+                    total_input_tokens = chunk.usage.prompt_tokens
+                    total_output_tokens = chunk.usage.completion_tokens
+
+            # Send final chunk with token counts
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py b/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
index 801ed067..276727b5 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/llamafile/llm.py
@@ -126,9 +126,13 @@ class Processor(LlmService):
                 frequency_penalty=0,
                 presence_penalty=0,
                 response_format={"type": "text"},
-                stream=True
+                stream=True,
+                stream_options={"include_usage": True}
             )
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             for chunk in response:
                 if chunk.choices and chunk.choices[0].delta.content:
                     yield LlmChunk(
@@ -139,10 +143,15 @@ class Processor(LlmService):
                         is_final=False
                     )
 
+                # Capture usage from final chunk
+                if chunk.usage:
+                    total_input_tokens = chunk.usage.prompt_tokens
+                    total_output_tokens = chunk.usage.completion_tokens
+
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py b/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
index 078a890e..b057f58d 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/lmstudio/llm.py
@@ -130,9 +130,13 @@ class Processor(LlmService):
                 frequency_penalty=0,
                 presence_penalty=0,
                 response_format={"type": "text"},
-                stream=True
+                stream=True,
+                stream_options={"include_usage": True}
             )
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             for chunk in response:
                 if chunk.choices and chunk.choices[0].delta.content:
                     yield LlmChunk(
@@ -143,10 +147,15 @@ class Processor(LlmService):
                         is_final=False
                     )
 
+                # Capture usage from final chunk
+                if chunk.usage:
+                    total_input_tokens = chunk.usage.prompt_tokens
+                    total_output_tokens = chunk.usage.completion_tokens
+
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
index 7952b1df..fab41ecd 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/mistral/llm.py
@@ -156,6 +156,9 @@ class Processor(LlmService):
                 response_format={"type": "text"}
             )
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             for chunk in stream:
                 if chunk.data.choices and chunk.data.choices[0].delta.content:
                     yield LlmChunk(
@@ -166,11 +169,16 @@ class Processor(LlmService):
                         is_final=False
                     )
 
-            # Send final chunk
+                # Capture usage data when available (typically in final chunk)
+                if chunk.data.usage:
+                    total_input_tokens = chunk.data.usage.prompt_tokens
+                    total_output_tokens = chunk.data.usage.completion_tokens
+
+            # Send final chunk with token counts
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
index 4da1378b..d65e27bf 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/openai/llm.py
@@ -153,9 +153,13 @@ class Processor(LlmService):
                 ],
                 temperature=effective_temperature,
                 max_tokens=self.max_output,
-                stream=True  # Enable streaming
+                stream=True,
+                stream_options={"include_usage": True}
             )
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             # Stream chunks
             for chunk in response:
                 if chunk.choices and chunk.choices[0].delta.content:
@@ -167,12 +171,16 @@ class Processor(LlmService):
                         is_final=False
                     )
 
-            # Note: OpenAI doesn't provide token counts in streaming mode
-            # Send final chunk without token counts
+                # Capture usage from final chunk
+                if chunk.usage:
+                    total_input_tokens = chunk.usage.prompt_tokens
+                    total_output_tokens = chunk.usage.completion_tokens
+
+            # Send final chunk with token counts
             yield LlmChunk(
                 text="",
-                in_token=None,
-                out_token=None,
+                in_token=total_input_tokens,
+                out_token=total_output_tokens,
                 model=model_name,
                 is_final=True
             )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/tgi/llm.py b/trustgraph-flow/trustgraph/model/text_completion/tgi/llm.py
index ca6da1ba..5caeb9be 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/tgi/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/tgi/llm.py
@@ -152,11 +152,15 @@ class Processor(LlmService):
             "max_tokens": self.max_output,
             "temperature": effective_temperature,
             "stream": True,
+            "stream_options": {"include_usage": True},
         }
 
         try:
             url = f"{self.base_url.rstrip('/')}/chat/completions"
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             async with self.session.post(
                     url,
                     headers=headers,
@@ -196,15 +200,21 @@ class Processor(LlmService):
                                             model=model_name,
                                             is_final=False
                                         )
+
+                            # Capture usage from final chunk
+                            if 'usage' in chunk_data and chunk_data['usage']:
+                                total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
+                                total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
+
                         except json.JSONDecodeError:
                             logger.warning(f"Failed to parse chunk: {data}")
                             continue
 
-                # Send final chunk
+                # Send final chunk with token counts
                 yield LlmChunk(
                     text="",
-                    in_token=None,
-                    out_token=None,
+                    in_token=total_input_tokens,
+                    out_token=total_output_tokens,
                     model=model_name,
                     is_final=True
                 )
diff --git a/trustgraph-flow/trustgraph/model/text_completion/vllm/llm.py b/trustgraph-flow/trustgraph/model/text_completion/vllm/llm.py
index 8d832b5c..2dd4576e 100755
--- a/trustgraph-flow/trustgraph/model/text_completion/vllm/llm.py
+++ b/trustgraph-flow/trustgraph/model/text_completion/vllm/llm.py
@@ -135,11 +135,15 @@ class Processor(LlmService):
             "max_tokens": self.max_output,
             "temperature": effective_temperature,
             "stream": True,
+            "stream_options": {"include_usage": True},
         }
 
         try:
             url = f"{self.base_url.rstrip('/')}/completions"
 
+            total_input_tokens = 0
+            total_output_tokens = 0
+
             async with self.session.post(
                     url,
                     headers=headers,
@@ -177,15 +181,21 @@ class Processor(LlmService):
                                         model=model_name,
                                         is_final=False
                                     )
+
+                            # Capture usage from final chunk
+                            if 'usage' in chunk_data and chunk_data['usage']:
+                                total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
+                                total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
+
                         except json.JSONDecodeError:
                             logger.warning(f"Failed to parse chunk: {data}")
                             continue
 
-                # Send final chunk
+                # Send final chunk with token counts
                 yield LlmChunk(
                     text="",
-                    in_token=None,
-                    out_token=None,
+                    in_token=total_input_tokens,
+                    out_token=total_output_tokens,
                     model=model_name,
                     is_final=True
                 )