mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-28 16:55:12 +02:00
Metering for all LLMs (#76)
* Fixed VertexAI token counts * Slight fix for Cohere token count * Slight tweak to AzureAI * Fix for prometheus for AzureAI
This commit is contained in:
parent
74a14639bd
commit
2f23309f05
3 changed files with 11 additions and 7 deletions
|
|
@ -22,6 +22,7 @@ default_output_queue = text_completion_response_queue
|
||||||
default_subscriber = module
|
default_subscriber = module
|
||||||
default_temperature = 0.0
|
default_temperature = 0.0
|
||||||
default_max_output = 4192
|
default_max_output = 4192
|
||||||
|
default_model = "AzureAI"
|
||||||
|
|
||||||
class Processor(ConsumerProducer):
|
class Processor(ConsumerProducer):
|
||||||
|
|
||||||
|
|
@ -34,6 +35,7 @@ class Processor(ConsumerProducer):
|
||||||
token = params.get("token")
|
token = params.get("token")
|
||||||
temperature = params.get("temperature", default_temperature)
|
temperature = params.get("temperature", default_temperature)
|
||||||
max_output = params.get("max_output", default_max_output)
|
max_output = params.get("max_output", default_max_output)
|
||||||
|
model = default_model
|
||||||
|
|
||||||
super(Processor, self).__init__(
|
super(Processor, self).__init__(
|
||||||
**params | {
|
**params | {
|
||||||
|
|
@ -44,6 +46,7 @@ class Processor(ConsumerProducer):
|
||||||
"output_schema": TextCompletionResponse,
|
"output_schema": TextCompletionResponse,
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
"max_output": max_output,
|
"max_output": max_output,
|
||||||
|
"model": model,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -64,6 +67,7 @@ class Processor(ConsumerProducer):
|
||||||
self.token = token
|
self.token = token
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.max_output = max_output
|
self.max_output = max_output
|
||||||
|
self.model = model
|
||||||
|
|
||||||
def build_prompt(self, system, content):
|
def build_prompt(self, system, content):
|
||||||
|
|
||||||
|
|
@ -140,7 +144,7 @@ class Processor(ConsumerProducer):
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
|
|
||||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI")
|
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
||||||
except TooManyRequests:
|
except TooManyRequests:
|
||||||
|
|
|
||||||
|
|
@ -91,8 +91,8 @@ class Processor(ConsumerProducer):
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = output.text
|
resp = output.text
|
||||||
inputtokens = output.meta.billed_units.input_tokens
|
inputtokens = int(output.meta.billed_units.input_tokens)
|
||||||
outputtokens = output.meta.billed_units.output_tokens
|
outputtokens = int(output.meta.billed_units.output_tokens)
|
||||||
|
|
||||||
print(resp, flush=True)
|
print(resp, flush=True)
|
||||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
|
|
||||||
|
|
@ -142,14 +142,14 @@ class Processor(ConsumerProducer):
|
||||||
|
|
||||||
with __class__.text_completion_metric.time():
|
with __class__.text_completion_metric.time():
|
||||||
|
|
||||||
resp = self.llm.generate_content(
|
response = self.llm.generate_content(
|
||||||
prompt, generation_config=self.generation_config,
|
prompt, generation_config=self.generation_config,
|
||||||
safety_settings=self.safety_settings
|
safety_settings=self.safety_settings
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = resp.text
|
resp = response.text
|
||||||
inputtokens = resp.usage_metadata.prompt_token_count
|
inputtokens = int(response.usage_metadata.prompt_token_count)
|
||||||
outputtokens = resp.usage_metadata.candidates_token_count
|
outputtokens = int(response.usage_metadata.candidates_token_count)
|
||||||
print(resp, flush=True)
|
print(resp, flush=True)
|
||||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue