mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-01 11:26:22 +02:00
Features/metering all llms (#70)
* Added Anthropic support and None logic * Added Cohere API support * Added support for Llamafile and OpenAI * Added support for VertexAI * Added AzureAI support
This commit is contained in:
parent
e5249c2bac
commit
88246742f3
8 changed files with 150 additions and 14 deletions
|
|
@ -51,13 +51,17 @@ class Processor(Consumer):
|
|||
num_out = v.out_token
|
||||
|
||||
model_input_price, model_output_price = self.get_prices(price_list, modelname)
|
||||
cost_in = num_in * model_input_price
|
||||
cost_out = num_out * model_output_price
|
||||
cost_per_call = cost_in + cost_out
|
||||
|
||||
if model_input_price == None:
|
||||
cost_per_call = f"Model Not Found in Price list"
|
||||
else:
|
||||
cost_in = num_in * model_input_price
|
||||
cost_out = num_out * model_output_price
|
||||
cost_per_call = round(cost_in + cost_out, 6)
|
||||
|
||||
print(f"Input Tokens: {num_in}", flush=True)
|
||||
print(f"Output Tokens: {num_out}", flush=True)
|
||||
print(f"Cost for call: ${cost_per_call:.6f}", flush=True)
|
||||
print(f"Cost for call: ${cost_per_call}", flush=True)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -44,6 +44,61 @@ price_list = {
|
|||
"model_name": "ollama",
|
||||
"input_price": 0,
|
||||
"output_price": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"model_name": "claude-3-haiku-20240307",
|
||||
"input_price": 0.00000025,
|
||||
"output_price": 0.00000125
|
||||
},
|
||||
{
|
||||
"model_name": "claude-3-5-sonnet-20240620",
|
||||
"input_price": 0.000003,
|
||||
"output_price": 0.000015
|
||||
},
|
||||
{
|
||||
"model_name": "claude-3-opus-20240229",
|
||||
"input_price": 0.000015,
|
||||
"output_price": 0.000075
|
||||
},
|
||||
{
|
||||
"model_name": "claude-3-sonnet-20240229",
|
||||
"input_price": 0.000003,
|
||||
"output_price": 0.000015
|
||||
},
|
||||
{
|
||||
"model_name": "command-r-08-202",
|
||||
"input_price": 0.0000025,
|
||||
"output_price": 0.000010
|
||||
},
|
||||
{
|
||||
"model_name": "c4ai-aya-23-8b",
|
||||
"input_price": 0,
|
||||
"output_price": 0
|
||||
},
|
||||
{
|
||||
"model_name": "llama.cpp",
|
||||
"input_price": 0,
|
||||
"output_price": 0
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4o",
|
||||
"input_price": 0.000005,
|
||||
"output_price": 0.000015
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4o-2024-08-06",
|
||||
"input_price": 0.0000025,
|
||||
"output_price": 0.000010
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4o-2024-05-13",
|
||||
"input_price": 0.000005,
|
||||
"output_price": 0.000015
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-4o-mini",
|
||||
"input_price": 0.00000015,
|
||||
"output_price": 0.0000006
|
||||
},
|
||||
]
|
||||
}
|
||||
|
|
@ -108,9 +108,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
result = resp.json()
|
||||
|
||||
message_content = result['choices'][0]['message']['content']
|
||||
|
||||
return message_content
|
||||
return result
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
|
|
@ -132,9 +130,17 @@ class Processor(ConsumerProducer):
|
|||
with __class__.text_completion_metric.time():
|
||||
response = self.call_llm(prompt)
|
||||
|
||||
resp = response['choices'][0]['message']['content']
|
||||
inputtokens = response['usage']['prompt_tokens']
|
||||
outputtokens = response['usage']['completion_tokens']
|
||||
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
|
||||
r = TextCompletionResponse(response=response, error=None)
|
||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI")
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
||||
except TooManyRequests:
|
||||
|
|
@ -145,7 +151,11 @@ class Processor(ConsumerProducer):
|
|||
error=Error(
|
||||
type = "rate-limit",
|
||||
message = str(e),
|
||||
)
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -162,7 +172,11 @@ class Processor(ConsumerProducer):
|
|||
error=Error(
|
||||
type = "llm-error",
|
||||
message = str(e),
|
||||
)
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
|
|
@ -105,10 +105,14 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
resp = response.content[0].text
|
||||
inputtokens = response.usage.input_tokens
|
||||
outputtokens = response.usage.output_tokens
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=resp, error=None)
|
||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||
self.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
|
@ -125,6 +129,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -143,6 +150,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
|
|
@ -91,10 +91,15 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
resp = output.text
|
||||
inputtokens = output.meta.billed_units.input_tokens
|
||||
outputtokens = output.meta.billed_units.output_tokens
|
||||
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(response=resp, error=None)
|
||||
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||
self.send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
|
@ -111,6 +116,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -129,6 +137,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
|
|
@ -107,12 +107,20 @@ class Processor(ConsumerProducer):
|
|||
#}
|
||||
)
|
||||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(
|
||||
response=resp.choices[0].message.content,
|
||||
error=None,
|
||||
in_token=inputtokens,
|
||||
out_token=outputtokens,
|
||||
model="llama.cpp"
|
||||
)
|
||||
self.send(r, properties={"id": id})
|
||||
|
||||
|
|
@ -130,6 +138,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -148,6 +159,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
|
|
@ -108,13 +108,20 @@ class Processor(ConsumerProducer):
|
|||
"type": "text"
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
r = TextCompletionResponse(
|
||||
response=resp.choices[0].message.content,
|
||||
error=None,
|
||||
in_token=inputtokens,
|
||||
out_token=outputtokens,
|
||||
model=self.model
|
||||
)
|
||||
self.send(r, properties={"id": id})
|
||||
|
||||
|
|
@ -132,6 +139,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -150,6 +160,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
|
|
@ -122,6 +122,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
print(f"Initialise model {model}", flush=True)
|
||||
self.llm = GenerativeModel(model)
|
||||
self.model = model
|
||||
|
||||
print("Initialisation complete", flush=True)
|
||||
|
||||
|
|
@ -147,12 +148,20 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
resp = resp.text
|
||||
inputtokens = resp.usage_metadata.prompt_token_count
|
||||
outputtokens = resp.usage_metadata.candidates_token_count
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
|
||||
r = TextCompletionResponse(
|
||||
error=None,
|
||||
response=resp,
|
||||
in_token=inputtokens,
|
||||
out_token=outputtokens,
|
||||
model=self.model
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -172,6 +181,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
@ -190,6 +202,9 @@ class Processor(ConsumerProducer):
|
|||
message = str(e),
|
||||
),
|
||||
response=None,
|
||||
in_token=None,
|
||||
out_token=None,
|
||||
model=None,
|
||||
)
|
||||
|
||||
self.producer.send(r, properties={"id": id})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue