mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-02 11:25:14 +02:00
Features/metering all llms (#70)
* Added Anthropic support and None logic * Added Cohere API support * Added support for Llamafile and OpenAI * Added support for VertexAI * Added AzureAI support
This commit is contained in:
parent
e5249c2bac
commit
88246742f3
8 changed files with 150 additions and 14 deletions
|
|
@ -51,13 +51,17 @@ class Processor(Consumer):
|
||||||
num_out = v.out_token
|
num_out = v.out_token
|
||||||
|
|
||||||
model_input_price, model_output_price = self.get_prices(price_list, modelname)
|
model_input_price, model_output_price = self.get_prices(price_list, modelname)
|
||||||
cost_in = num_in * model_input_price
|
|
||||||
cost_out = num_out * model_output_price
|
if model_input_price == None:
|
||||||
cost_per_call = cost_in + cost_out
|
cost_per_call = f"Model Not Found in Price list"
|
||||||
|
else:
|
||||||
|
cost_in = num_in * model_input_price
|
||||||
|
cost_out = num_out * model_output_price
|
||||||
|
cost_per_call = round(cost_in + cost_out, 6)
|
||||||
|
|
||||||
print(f"Input Tokens: {num_in}", flush=True)
|
print(f"Input Tokens: {num_in}", flush=True)
|
||||||
print(f"Output Tokens: {num_out}", flush=True)
|
print(f"Output Tokens: {num_out}", flush=True)
|
||||||
print(f"Cost for call: ${cost_per_call:.6f}", flush=True)
|
print(f"Cost for call: ${cost_per_call}", flush=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_args(parser):
|
def add_args(parser):
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,61 @@ price_list = {
|
||||||
"model_name": "ollama",
|
"model_name": "ollama",
|
||||||
"input_price": 0,
|
"input_price": 0,
|
||||||
"output_price": 0
|
"output_price": 0
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-haiku-20240307",
|
||||||
|
"input_price": 0.00000025,
|
||||||
|
"output_price": 0.00000125
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-5-sonnet-20240620",
|
||||||
|
"input_price": 0.000003,
|
||||||
|
"output_price": 0.000015
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-opus-20240229",
|
||||||
|
"input_price": 0.000015,
|
||||||
|
"output_price": 0.000075
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "claude-3-sonnet-20240229",
|
||||||
|
"input_price": 0.000003,
|
||||||
|
"output_price": 0.000015
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "command-r-08-202",
|
||||||
|
"input_price": 0.0000025,
|
||||||
|
"output_price": 0.000010
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "c4ai-aya-23-8b",
|
||||||
|
"input_price": 0,
|
||||||
|
"output_price": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "llama.cpp",
|
||||||
|
"input_price": 0,
|
||||||
|
"output_price": 0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4o",
|
||||||
|
"input_price": 0.000005,
|
||||||
|
"output_price": 0.000015
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4o-2024-08-06",
|
||||||
|
"input_price": 0.0000025,
|
||||||
|
"output_price": 0.000010
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4o-2024-05-13",
|
||||||
|
"input_price": 0.000005,
|
||||||
|
"output_price": 0.000015
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4o-mini",
|
||||||
|
"input_price": 0.00000015,
|
||||||
|
"output_price": 0.0000006
|
||||||
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -108,9 +108,7 @@ class Processor(ConsumerProducer):
|
||||||
|
|
||||||
result = resp.json()
|
result = resp.json()
|
||||||
|
|
||||||
message_content = result['choices'][0]['message']['content']
|
return result
|
||||||
|
|
||||||
return message_content
|
|
||||||
|
|
||||||
def handle(self, msg):
|
def handle(self, msg):
|
||||||
|
|
||||||
|
|
@ -132,9 +130,17 @@ class Processor(ConsumerProducer):
|
||||||
with __class__.text_completion_metric.time():
|
with __class__.text_completion_metric.time():
|
||||||
response = self.call_llm(prompt)
|
response = self.call_llm(prompt)
|
||||||
|
|
||||||
|
resp = response['choices'][0]['message']['content']
|
||||||
|
inputtokens = response['usage']['prompt_tokens']
|
||||||
|
outputtokens = response['usage']['completion_tokens']
|
||||||
|
|
||||||
|
print(resp, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
|
|
||||||
r = TextCompletionResponse(response=response, error=None)
|
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI")
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
||||||
except TooManyRequests:
|
except TooManyRequests:
|
||||||
|
|
@ -145,7 +151,11 @@ class Processor(ConsumerProducer):
|
||||||
error=Error(
|
error=Error(
|
||||||
type = "rate-limit",
|
type = "rate-limit",
|
||||||
message = str(e),
|
message = str(e),
|
||||||
)
|
),
|
||||||
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -162,7 +172,11 @@ class Processor(ConsumerProducer):
|
||||||
error=Error(
|
error=Error(
|
||||||
type = "llm-error",
|
type = "llm-error",
|
||||||
message = str(e),
|
message = str(e),
|
||||||
)
|
),
|
||||||
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
|
|
@ -105,10 +105,14 @@ class Processor(ConsumerProducer):
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = response.content[0].text
|
resp = response.content[0].text
|
||||||
|
inputtokens = response.usage.input_tokens
|
||||||
|
outputtokens = response.usage.output_tokens
|
||||||
print(resp, flush=True)
|
print(resp, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
r = TextCompletionResponse(response=resp, error=None)
|
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||||
self.send(r, properties={"id": id})
|
self.send(r, properties={"id": id})
|
||||||
|
|
||||||
print("Done.", flush=True)
|
print("Done.", flush=True)
|
||||||
|
|
@ -125,6 +129,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -143,6 +150,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
|
|
@ -91,10 +91,15 @@ class Processor(ConsumerProducer):
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = output.text
|
resp = output.text
|
||||||
|
inputtokens = output.meta.billed_units.input_tokens
|
||||||
|
outputtokens = output.meta.billed_units.output_tokens
|
||||||
|
|
||||||
print(resp, flush=True)
|
print(resp, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
r = TextCompletionResponse(response=resp, error=None)
|
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
|
||||||
self.send(r, properties={"id": id})
|
self.send(r, properties={"id": id})
|
||||||
|
|
||||||
print("Done.", flush=True)
|
print("Done.", flush=True)
|
||||||
|
|
@ -111,6 +116,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -129,6 +137,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
|
|
@ -107,12 +107,20 @@ class Processor(ConsumerProducer):
|
||||||
#}
|
#}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
inputtokens = resp.usage.prompt_tokens
|
||||||
|
outputtokens = resp.usage.completion_tokens
|
||||||
|
|
||||||
print(resp.choices[0].message.content, flush=True)
|
print(resp.choices[0].message.content, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
r = TextCompletionResponse(
|
r = TextCompletionResponse(
|
||||||
response=resp.choices[0].message.content,
|
response=resp.choices[0].message.content,
|
||||||
error=None,
|
error=None,
|
||||||
|
in_token=inputtokens,
|
||||||
|
out_token=outputtokens,
|
||||||
|
model="llama.cpp"
|
||||||
)
|
)
|
||||||
self.send(r, properties={"id": id})
|
self.send(r, properties={"id": id})
|
||||||
|
|
||||||
|
|
@ -130,6 +138,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -148,6 +159,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
|
|
@ -108,13 +108,20 @@ class Processor(ConsumerProducer):
|
||||||
"type": "text"
|
"type": "text"
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
inputtokens = resp.usage.prompt_tokens
|
||||||
|
outputtokens = resp.usage.completion_tokens
|
||||||
print(resp.choices[0].message.content, flush=True)
|
print(resp.choices[0].message.content, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
r = TextCompletionResponse(
|
r = TextCompletionResponse(
|
||||||
response=resp.choices[0].message.content,
|
response=resp.choices[0].message.content,
|
||||||
error=None,
|
error=None,
|
||||||
|
in_token=inputtokens,
|
||||||
|
out_token=outputtokens,
|
||||||
|
model=self.model
|
||||||
)
|
)
|
||||||
self.send(r, properties={"id": id})
|
self.send(r, properties={"id": id})
|
||||||
|
|
||||||
|
|
@ -132,6 +139,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -150,6 +160,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
|
|
@ -122,6 +122,7 @@ class Processor(ConsumerProducer):
|
||||||
|
|
||||||
print(f"Initialise model {model}", flush=True)
|
print(f"Initialise model {model}", flush=True)
|
||||||
self.llm = GenerativeModel(model)
|
self.llm = GenerativeModel(model)
|
||||||
|
self.model = model
|
||||||
|
|
||||||
print("Initialisation complete", flush=True)
|
print("Initialisation complete", flush=True)
|
||||||
|
|
||||||
|
|
@ -147,12 +148,20 @@ class Processor(ConsumerProducer):
|
||||||
)
|
)
|
||||||
|
|
||||||
resp = resp.text
|
resp = resp.text
|
||||||
|
inputtokens = resp.usage_metadata.prompt_token_count
|
||||||
|
outputtokens = resp.usage_metadata.candidates_token_count
|
||||||
|
print(resp, flush=True)
|
||||||
|
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||||
|
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||||
|
|
||||||
print("Send response...", flush=True)
|
print("Send response...", flush=True)
|
||||||
|
|
||||||
r = TextCompletionResponse(
|
r = TextCompletionResponse(
|
||||||
error=None,
|
error=None,
|
||||||
response=resp,
|
response=resp,
|
||||||
|
in_token=inputtokens,
|
||||||
|
out_token=outputtokens,
|
||||||
|
model=self.model
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -172,6 +181,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
@ -190,6 +202,9 @@ class Processor(ConsumerProducer):
|
||||||
message = str(e),
|
message = str(e),
|
||||||
),
|
),
|
||||||
response=None,
|
response=None,
|
||||||
|
in_token=None,
|
||||||
|
out_token=None,
|
||||||
|
model=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.producer.send(r, properties={"id": id})
|
self.producer.send(r, properties={"id": id})
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue