mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-27 16:25:12 +02:00
Metering for all LLMs (#76)
* Fixed VertexAI token counts * Slight fix for Cohere token count * Slight tweak to AzureAI * Fix for prometheus for AzureAI
This commit is contained in:
parent
74a14639bd
commit
2f23309f05
3 changed files with 11 additions and 7 deletions
|
|
@ -22,6 +22,7 @@ default_output_queue = text_completion_response_queue
|
|||
default_subscriber = module
|
||||
default_temperature = 0.0
|
||||
default_max_output = 4192
|
||||
default_model = "AzureAI"
|
||||
|
||||
class Processor(ConsumerProducer):
|
||||
|
||||
|
|
@ -34,6 +35,7 @@ class Processor(ConsumerProducer):
|
|||
token = params.get("token")
|
||||
temperature = params.get("temperature", default_temperature)
|
||||
max_output = params.get("max_output", default_max_output)
|
||||
model = default_model
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
|
|
@ -44,6 +46,7 @@ class Processor(ConsumerProducer):
|
|||
"output_schema": TextCompletionResponse,
|
||||
"temperature": temperature,
|
||||
"max_output": max_output,
|
||||
"model": model,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -64,6 +67,7 @@ class Processor(ConsumerProducer):
|
|||
self.token = token
|
||||
self.temperature = temperature
|
||||
self.max_output = max_output
|
||||
self.model = model
|
||||
|
||||
def build_prompt(self, system, content):
|
||||
|
||||
|
|
@ -140,7 +144,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
print("Send response...", flush=True)
|
||||
|
||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI")
|
||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
except TooManyRequests:
|
||||
|
|
|
|||
|
|
@ -91,8 +91,8 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
resp = output.text
|
||||
inputtokens = output.meta.billed_units.input_tokens
|
||||
outputtokens = output.meta.billed_units.output_tokens
|
||||
inputtokens = int(output.meta.billed_units.input_tokens)
|
||||
outputtokens = int(output.meta.billed_units.output_tokens)
|
||||
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
|
|
|
|||
|
|
@ -142,14 +142,14 @@ class Processor(ConsumerProducer):
|
|||
|
||||
with __class__.text_completion_metric.time():
|
||||
|
||||
resp = self.llm.generate_content(
|
||||
response = self.llm.generate_content(
|
||||
prompt, generation_config=self.generation_config,
|
||||
safety_settings=self.safety_settings
|
||||
)
|
||||
|
||||
resp = resp.text
|
||||
inputtokens = resp.usage_metadata.prompt_token_count
|
||||
outputtokens = resp.usage_metadata.candidates_token_count
|
||||
resp = response.text
|
||||
inputtokens = int(response.usage_metadata.prompt_token_count)
|
||||
outputtokens = int(response.usage_metadata.candidates_token_count)
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue