From 88246742f3db1c2a755c02b95b8a6f0910cbba30 Mon Sep 17 00:00:00 2001 From: Jack Colquitt <126733989+JackColquitt@users.noreply.github.com> Date: Sun, 29 Sep 2024 10:11:48 -0700 Subject: [PATCH] Features/metering all llms (#70) * Added Anthropic support and None logic * Added Cohere API support * Added support for Llamafile and OpenAI * Added support for VertexAI * Added AzureAI support --- trustgraph/metering/counter.py | 12 ++-- trustgraph/metering/pricelist.py | 57 ++++++++++++++++++- trustgraph/model/text_completion/azure/llm.py | 26 +++++++-- .../model/text_completion/claude/llm.py | 12 +++- .../model/text_completion/cohere/llm.py | 13 ++++- .../model/text_completion/llamafile/llm.py | 14 +++++ .../model/text_completion/openai/llm.py | 15 ++++- .../model/text_completion/vertexai/llm.py | 15 +++++ 8 files changed, 150 insertions(+), 14 deletions(-) diff --git a/trustgraph/metering/counter.py b/trustgraph/metering/counter.py index 0a33f413..6404888e 100644 --- a/trustgraph/metering/counter.py +++ b/trustgraph/metering/counter.py @@ -51,13 +51,17 @@ class Processor(Consumer): num_out = v.out_token model_input_price, model_output_price = self.get_prices(price_list, modelname) - cost_in = num_in * model_input_price - cost_out = num_out * model_output_price - cost_per_call = cost_in + cost_out + + if model_input_price == None: + cost_per_call = f"Model Not Found in Price list" + else: + cost_in = num_in * model_input_price + cost_out = num_out * model_output_price + cost_per_call = round(cost_in + cost_out, 6) print(f"Input Tokens: {num_in}", flush=True) print(f"Output Tokens: {num_out}", flush=True) - print(f"Cost for call: ${cost_per_call:.6f}", flush=True) + print(f"Cost for call: ${cost_per_call}", flush=True) @staticmethod def add_args(parser): diff --git a/trustgraph/metering/pricelist.py b/trustgraph/metering/pricelist.py index bffa9bec..e890d0e1 100644 --- a/trustgraph/metering/pricelist.py +++ b/trustgraph/metering/pricelist.py @@ -44,6 +44,61 @@ price_list = { "model_name": "ollama", "input_price": 0, "output_price": 0 - } + }, + { + "model_name": "claude-3-haiku-20240307", + "input_price": 0.00000025, + "output_price": 0.00000125 + }, + { + "model_name": "claude-3-5-sonnet-20240620", + "input_price": 0.000003, + "output_price": 0.000015 + }, + { + "model_name": "claude-3-opus-20240229", + "input_price": 0.000015, + "output_price": 0.000075 + }, + { + "model_name": "claude-3-sonnet-20240229", + "input_price": 0.000003, + "output_price": 0.000015 + }, + { + "model_name": "command-r-08-202", + "input_price": 0.0000025, + "output_price": 0.000010 + }, + { + "model_name": "c4ai-aya-23-8b", + "input_price": 0, + "output_price": 0 + }, + { + "model_name": "llama.cpp", + "input_price": 0, + "output_price": 0 + }, + { + "model_name": "gpt-4o", + "input_price": 0.000005, + "output_price": 0.000015 + }, + { + "model_name": "gpt-4o-2024-08-06", + "input_price": 0.0000025, + "output_price": 0.000010 + }, + { + "model_name": "gpt-4o-2024-05-13", + "input_price": 0.000005, + "output_price": 0.000015 + }, + { + "model_name": "gpt-4o-mini", + "input_price": 0.00000015, + "output_price": 0.0000006 + }, ] } \ No newline at end of file diff --git a/trustgraph/model/text_completion/azure/llm.py b/trustgraph/model/text_completion/azure/llm.py index 86395317..25339924 100755 --- a/trustgraph/model/text_completion/azure/llm.py +++ b/trustgraph/model/text_completion/azure/llm.py @@ -108,9 +108,7 @@ class Processor(ConsumerProducer): result = resp.json() - message_content = result['choices'][0]['message']['content'] - - return message_content + return result def handle(self, msg): @@ -132,9 +130,17 @@ class Processor(ConsumerProducer): with __class__.text_completion_metric.time(): response = self.call_llm(prompt) + resp = response['choices'][0]['message']['content'] + inputtokens = response['usage']['prompt_tokens'] + outputtokens = response['usage']['completion_tokens'] + + print(resp, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) + print("Send response...", flush=True) - r = TextCompletionResponse(response=response, error=None) + r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI") self.producer.send(r, properties={"id": id}) except TooManyRequests: @@ -145,7 +151,11 @@ class Processor(ConsumerProducer): error=Error( type = "rate-limit", message = str(e), - ) + ), + response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -162,7 +172,11 @@ class Processor(ConsumerProducer): error=Error( type = "llm-error", message = str(e), - ) + ), + response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) diff --git a/trustgraph/model/text_completion/claude/llm.py b/trustgraph/model/text_completion/claude/llm.py index 85d77a85..ad949b02 100755 --- a/trustgraph/model/text_completion/claude/llm.py +++ b/trustgraph/model/text_completion/claude/llm.py @@ -105,10 +105,14 @@ class Processor(ConsumerProducer): ) resp = response.content[0].text + inputtokens = response.usage.input_tokens + outputtokens = response.usage.output_tokens print(resp, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) print("Send response...", flush=True) - r = TextCompletionResponse(response=resp, error=None) + r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model) self.send(r, properties={"id": id}) print("Done.", flush=True) @@ -125,6 +129,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -143,6 +150,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) diff --git a/trustgraph/model/text_completion/cohere/llm.py b/trustgraph/model/text_completion/cohere/llm.py index af55fd2c..75ef8ae4 100755 --- a/trustgraph/model/text_completion/cohere/llm.py +++ b/trustgraph/model/text_completion/cohere/llm.py @@ -91,10 +91,15 @@ class Processor(ConsumerProducer): ) resp = output.text + inputtokens = output.meta.billed_units.input_tokens + outputtokens = output.meta.billed_units.output_tokens + print(resp, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) print("Send response...", flush=True) - r = TextCompletionResponse(response=resp, error=None) + r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model) self.send(r, properties={"id": id}) print("Done.", flush=True) @@ -111,6 +116,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -129,6 +137,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) diff --git a/trustgraph/model/text_completion/llamafile/llm.py b/trustgraph/model/text_completion/llamafile/llm.py index c42ec472..86427167 100755 --- a/trustgraph/model/text_completion/llamafile/llm.py +++ b/trustgraph/model/text_completion/llamafile/llm.py @@ -107,12 +107,20 @@ class Processor(ConsumerProducer): #} ) + inputtokens = resp.usage.prompt_tokens + outputtokens = resp.usage.completion_tokens + print(resp.choices[0].message.content, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) print("Send response...", flush=True) r = TextCompletionResponse( response=resp.choices[0].message.content, error=None, + in_token=inputtokens, + out_token=outputtokens, + model="llama.cpp" ) self.send(r, properties={"id": id}) @@ -130,6 +138,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -148,6 +159,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) diff --git a/trustgraph/model/text_completion/openai/llm.py b/trustgraph/model/text_completion/openai/llm.py index d4563e7b..5d259e7e 100755 --- a/trustgraph/model/text_completion/openai/llm.py +++ b/trustgraph/model/text_completion/openai/llm.py @@ -108,13 +108,20 @@ class Processor(ConsumerProducer): "type": "text" } ) - + + inputtokens = resp.usage.prompt_tokens + outputtokens = resp.usage.completion_tokens print(resp.choices[0].message.content, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) print("Send response...", flush=True) r = TextCompletionResponse( response=resp.choices[0].message.content, error=None, + in_token=inputtokens, + out_token=outputtokens, + model=self.model ) self.send(r, properties={"id": id}) @@ -132,6 +139,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -150,6 +160,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) diff --git a/trustgraph/model/text_completion/vertexai/llm.py b/trustgraph/model/text_completion/vertexai/llm.py index 17ebde6c..41f4ff3e 100755 --- a/trustgraph/model/text_completion/vertexai/llm.py +++ b/trustgraph/model/text_completion/vertexai/llm.py @@ -122,6 +122,7 @@ class Processor(ConsumerProducer): print(f"Initialise model {model}", flush=True) self.llm = GenerativeModel(model) + self.model = model print("Initialisation complete", flush=True) @@ -147,12 +148,20 @@ class Processor(ConsumerProducer): ) resp = resp.text + inputtokens = resp.usage_metadata.prompt_token_count + outputtokens = resp.usage_metadata.candidates_token_count + print(resp, flush=True) + print(f"Input Tokens: {inputtokens}", flush=True) + print(f"Output Tokens: {outputtokens}", flush=True) print("Send response...", flush=True) r = TextCompletionResponse( error=None, response=resp, + in_token=inputtokens, + out_token=outputtokens, + model=self.model ) self.producer.send(r, properties={"id": id}) @@ -172,6 +181,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id}) @@ -190,6 +202,9 @@ class Processor(ConsumerProducer): message = str(e), ), response=None, + in_token=None, + out_token=None, + model=None, ) self.producer.send(r, properties={"id": id})