Feature/metering dashboard (#89)

* Bump version

* Added Prom metrics to metering, added dashboard

* Update YAMLs

* Add $ on axis

* Tweak dashboard
This commit is contained in:
cybermaggedon 2024-10-01 06:46:41 +01:00 committed by GitHub
parent 88a7dfa126
commit ef1b8b5a13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 616 additions and 358 deletions

View file

@ -2,7 +2,7 @@
Simple token counter for each LLM response.
"""
from prometheus_client import Histogram, Info
from prometheus_client import Counter
from . pricelist import price_list
from .. schema import TextCompletionResponse, Error
@ -20,6 +20,26 @@ class Processor(Consumer):
def __init__(self, **params):
if not hasattr(__class__, "input_token_metric"):
__class__.input_token_metric = Counter(
'input_tokens', 'Input token count'
)
if not hasattr(__class__, "output_token_metric"):
__class__.output_token_metric = Counter(
'output_tokens', 'Output token count'
)
if not hasattr(__class__, "input_cost_metric"):
__class__.input_cost_metric = Counter(
'input_cost', 'Input cost'
)
if not hasattr(__class__, "output_cost_metric"):
__class__.output_cost_metric = Counter(
'output_cost', 'Output cost'
)
input_queue = params.get("input_queue", default_input_queue)
subscriber = params.get("subscriber", default_subscriber)
@ -50,6 +70,9 @@ class Processor(Consumer):
num_in = v.in_token
num_out = v.out_token
__class__.input_token_metric.inc(num_in)
__class__.output_token_metric.inc(num_out)
model_input_price, model_output_price = self.get_prices(price_list, modelname)
if model_input_price == None:
@ -59,6 +82,9 @@ class Processor(Consumer):
cost_out = num_out * model_output_price
cost_per_call = round(cost_in + cost_out, 6)
__class__.input_cost_metric.inc(cost_in)
__class__.output_cost_metric.inc(cost_out)
print(f"Input Tokens: {num_in}", flush=True)
print(f"Output Tokens: {num_out}", flush=True)
print(f"Cost for call: ${cost_per_call}", flush=True)
@ -72,4 +98,4 @@ class Processor(Consumer):
def run():
Processor.start(module, __doc__)
Processor.start(module, __doc__)