mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
parent
4fca97d555
commit
2781c7d87c
8 changed files with 100 additions and 27 deletions
|
|
@ -75,6 +75,7 @@ class Processor(LlmService):
|
|||
|
||||
if stream:
|
||||
data["stream"] = True
|
||||
data["stream_options"] = {"include_usage": True}
|
||||
|
||||
body = json.dumps(data)
|
||||
|
||||
|
|
@ -191,6 +192,9 @@ class Processor(LlmService):
|
|||
if response.status_code != 200:
|
||||
raise RuntimeError("LLM failure")
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
# Parse SSE stream
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
|
|
@ -215,15 +219,21 @@ class Processor(LlmService):
|
|||
model=model_name,
|
||||
is_final=False
|
||||
)
|
||||
|
||||
# Capture usage from final chunk
|
||||
if 'usage' in chunk_data and chunk_data['usage']:
|
||||
total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
|
||||
total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse chunk: {data}")
|
||||
continue
|
||||
|
||||
# Send final chunk
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -161,9 +161,13 @@ class Processor(LlmService):
|
|||
temperature=effective_temperature,
|
||||
max_tokens=self.max_output,
|
||||
top_p=1,
|
||||
stream=True # Enable streaming
|
||||
stream=True,
|
||||
stream_options={"include_usage": True}
|
||||
)
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
# Stream chunks
|
||||
for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
|
|
@ -175,11 +179,16 @@ class Processor(LlmService):
|
|||
is_final=False
|
||||
)
|
||||
|
||||
# Send final chunk
|
||||
# Capture usage from final chunk
|
||||
if chunk.usage:
|
||||
total_input_tokens = chunk.usage.prompt_tokens
|
||||
total_output_tokens = chunk.usage.completion_tokens
|
||||
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -126,9 +126,13 @@ class Processor(LlmService):
|
|||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
response_format={"type": "text"},
|
||||
stream=True
|
||||
stream=True,
|
||||
stream_options={"include_usage": True}
|
||||
)
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
yield LlmChunk(
|
||||
|
|
@ -139,10 +143,15 @@ class Processor(LlmService):
|
|||
is_final=False
|
||||
)
|
||||
|
||||
# Capture usage from final chunk
|
||||
if chunk.usage:
|
||||
total_input_tokens = chunk.usage.prompt_tokens
|
||||
total_output_tokens = chunk.usage.completion_tokens
|
||||
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -130,9 +130,13 @@ class Processor(LlmService):
|
|||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
response_format={"type": "text"},
|
||||
stream=True
|
||||
stream=True,
|
||||
stream_options={"include_usage": True}
|
||||
)
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
yield LlmChunk(
|
||||
|
|
@ -143,10 +147,15 @@ class Processor(LlmService):
|
|||
is_final=False
|
||||
)
|
||||
|
||||
# Capture usage from final chunk
|
||||
if chunk.usage:
|
||||
total_input_tokens = chunk.usage.prompt_tokens
|
||||
total_output_tokens = chunk.usage.completion_tokens
|
||||
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -156,6 +156,9 @@ class Processor(LlmService):
|
|||
response_format={"type": "text"}
|
||||
)
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
for chunk in stream:
|
||||
if chunk.data.choices and chunk.data.choices[0].delta.content:
|
||||
yield LlmChunk(
|
||||
|
|
@ -166,11 +169,16 @@ class Processor(LlmService):
|
|||
is_final=False
|
||||
)
|
||||
|
||||
# Send final chunk
|
||||
# Capture usage data when available (typically in final chunk)
|
||||
if chunk.data.usage:
|
||||
total_input_tokens = chunk.data.usage.prompt_tokens
|
||||
total_output_tokens = chunk.data.usage.completion_tokens
|
||||
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -153,9 +153,13 @@ class Processor(LlmService):
|
|||
],
|
||||
temperature=effective_temperature,
|
||||
max_tokens=self.max_output,
|
||||
stream=True # Enable streaming
|
||||
stream=True,
|
||||
stream_options={"include_usage": True}
|
||||
)
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
# Stream chunks
|
||||
for chunk in response:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
|
|
@ -167,12 +171,16 @@ class Processor(LlmService):
|
|||
is_final=False
|
||||
)
|
||||
|
||||
# Note: OpenAI doesn't provide token counts in streaming mode
|
||||
# Send final chunk without token counts
|
||||
# Capture usage from final chunk
|
||||
if chunk.usage:
|
||||
total_input_tokens = chunk.usage.prompt_tokens
|
||||
total_output_tokens = chunk.usage.completion_tokens
|
||||
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -152,11 +152,15 @@ class Processor(LlmService):
|
|||
"max_tokens": self.max_output,
|
||||
"temperature": effective_temperature,
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True},
|
||||
}
|
||||
|
||||
try:
|
||||
url = f"{self.base_url.rstrip('/')}/chat/completions"
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
async with self.session.post(
|
||||
url,
|
||||
headers=headers,
|
||||
|
|
@ -196,15 +200,21 @@ class Processor(LlmService):
|
|||
model=model_name,
|
||||
is_final=False
|
||||
)
|
||||
|
||||
# Capture usage from final chunk
|
||||
if 'usage' in chunk_data and chunk_data['usage']:
|
||||
total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
|
||||
total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse chunk: {data}")
|
||||
continue
|
||||
|
||||
# Send final chunk
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
|
|
@ -135,11 +135,15 @@ class Processor(LlmService):
|
|||
"max_tokens": self.max_output,
|
||||
"temperature": effective_temperature,
|
||||
"stream": True,
|
||||
"stream_options": {"include_usage": True},
|
||||
}
|
||||
|
||||
try:
|
||||
url = f"{self.base_url.rstrip('/')}/completions"
|
||||
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
|
||||
async with self.session.post(
|
||||
url,
|
||||
headers=headers,
|
||||
|
|
@ -177,15 +181,21 @@ class Processor(LlmService):
|
|||
model=model_name,
|
||||
is_final=False
|
||||
)
|
||||
|
||||
# Capture usage from final chunk
|
||||
if 'usage' in chunk_data and chunk_data['usage']:
|
||||
total_input_tokens = chunk_data['usage'].get('prompt_tokens', 0)
|
||||
total_output_tokens = chunk_data['usage'].get('completion_tokens', 0)
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Failed to parse chunk: {data}")
|
||||
continue
|
||||
|
||||
# Send final chunk
|
||||
# Send final chunk with token counts
|
||||
yield LlmChunk(
|
||||
text="",
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
in_token=total_input_tokens,
|
||||
out_token=total_output_tokens,
|
||||
model=model_name,
|
||||
is_final=True
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue