Features/metering all llms (#70)

* Added Anthropic support and None logic

* Added Cohere API support

* Added support for Llamafile and OpenAI

* Added support for VertexAI

* Added AzureAI support
This commit is contained in:
Jack Colquitt 2024-09-29 10:11:48 -07:00 committed by GitHub
parent e5249c2bac
commit 88246742f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 150 additions and 14 deletions

View file

@ -51,13 +51,17 @@ class Processor(Consumer):
num_out = v.out_token
model_input_price, model_output_price = self.get_prices(price_list, modelname)
cost_in = num_in * model_input_price
cost_out = num_out * model_output_price
cost_per_call = cost_in + cost_out
if model_input_price == None:
cost_per_call = f"Model Not Found in Price list"
else:
cost_in = num_in * model_input_price
cost_out = num_out * model_output_price
cost_per_call = round(cost_in + cost_out, 6)
print(f"Input Tokens: {num_in}", flush=True)
print(f"Output Tokens: {num_out}", flush=True)
print(f"Cost for call: ${cost_per_call:.6f}", flush=True)
print(f"Cost for call: ${cost_per_call}", flush=True)
@staticmethod
def add_args(parser):

View file

@ -44,6 +44,61 @@ price_list = {
"model_name": "ollama",
"input_price": 0,
"output_price": 0
}
},
{
"model_name": "claude-3-haiku-20240307",
"input_price": 0.00000025,
"output_price": 0.00000125
},
{
"model_name": "claude-3-5-sonnet-20240620",
"input_price": 0.000003,
"output_price": 0.000015
},
{
"model_name": "claude-3-opus-20240229",
"input_price": 0.000015,
"output_price": 0.000075
},
{
"model_name": "claude-3-sonnet-20240229",
"input_price": 0.000003,
"output_price": 0.000015
},
{
"model_name": "command-r-08-202",
"input_price": 0.0000025,
"output_price": 0.000010
},
{
"model_name": "c4ai-aya-23-8b",
"input_price": 0,
"output_price": 0
},
{
"model_name": "llama.cpp",
"input_price": 0,
"output_price": 0
},
{
"model_name": "gpt-4o",
"input_price": 0.000005,
"output_price": 0.000015
},
{
"model_name": "gpt-4o-2024-08-06",
"input_price": 0.0000025,
"output_price": 0.000010
},
{
"model_name": "gpt-4o-2024-05-13",
"input_price": 0.000005,
"output_price": 0.000015
},
{
"model_name": "gpt-4o-mini",
"input_price": 0.00000015,
"output_price": 0.0000006
},
]
}

View file

@ -108,9 +108,7 @@ class Processor(ConsumerProducer):
result = resp.json()
message_content = result['choices'][0]['message']['content']
return message_content
return result
def handle(self, msg):
@ -132,9 +130,17 @@ class Processor(ConsumerProducer):
with __class__.text_completion_metric.time():
response = self.call_llm(prompt)
resp = response['choices'][0]['message']['content']
inputtokens = response['usage']['prompt_tokens']
outputtokens = response['usage']['completion_tokens']
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(response=response, error=None)
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model="AzureAI")
self.producer.send(r, properties={"id": id})
except TooManyRequests:
@ -145,7 +151,11 @@ class Processor(ConsumerProducer):
error=Error(
type = "rate-limit",
message = str(e),
)
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -162,7 +172,11 @@ class Processor(ConsumerProducer):
error=Error(
type = "llm-error",
message = str(e),
)
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})

View file

@ -105,10 +105,14 @@ class Processor(ConsumerProducer):
)
resp = response.content[0].text
inputtokens = response.usage.input_tokens
outputtokens = response.usage.output_tokens
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(response=resp, error=None)
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
self.send(r, properties={"id": id})
print("Done.", flush=True)
@ -125,6 +129,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -143,6 +150,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})

View file

@ -91,10 +91,15 @@ class Processor(ConsumerProducer):
)
resp = output.text
inputtokens = output.meta.billed_units.input_tokens
outputtokens = output.meta.billed_units.output_tokens
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(response=resp, error=None)
r = TextCompletionResponse(response=resp, error=None, in_token=inputtokens, out_token=outputtokens, model=self.model)
self.send(r, properties={"id": id})
print("Done.", flush=True)
@ -111,6 +116,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -129,6 +137,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})

View file

@ -107,12 +107,20 @@ class Processor(ConsumerProducer):
#}
)
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(
response=resp.choices[0].message.content,
error=None,
in_token=inputtokens,
out_token=outputtokens,
model="llama.cpp"
)
self.send(r, properties={"id": id})
@ -130,6 +138,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -148,6 +159,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})

View file

@ -108,13 +108,20 @@ class Processor(ConsumerProducer):
"type": "text"
}
)
inputtokens = resp.usage.prompt_tokens
outputtokens = resp.usage.completion_tokens
print(resp.choices[0].message.content, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(
response=resp.choices[0].message.content,
error=None,
in_token=inputtokens,
out_token=outputtokens,
model=self.model
)
self.send(r, properties={"id": id})
@ -132,6 +139,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -150,6 +160,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})

View file

@ -122,6 +122,7 @@ class Processor(ConsumerProducer):
print(f"Initialise model {model}", flush=True)
self.llm = GenerativeModel(model)
self.model = model
print("Initialisation complete", flush=True)
@ -147,12 +148,20 @@ class Processor(ConsumerProducer):
)
resp = resp.text
inputtokens = resp.usage_metadata.prompt_token_count
outputtokens = resp.usage_metadata.candidates_token_count
print(resp, flush=True)
print(f"Input Tokens: {inputtokens}", flush=True)
print(f"Output Tokens: {outputtokens}", flush=True)
print("Send response...", flush=True)
r = TextCompletionResponse(
error=None,
response=resp,
in_token=inputtokens,
out_token=outputtokens,
model=self.model
)
self.producer.send(r, properties={"id": id})
@ -172,6 +181,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})
@ -190,6 +202,9 @@ class Processor(ConsumerProducer):
message = str(e),
),
response=None,
in_token=None,
out_token=None,
model=None,
)
self.producer.send(r, properties={"id": id})