Flow temperature parameter (#533)

* Add temperature parameter to LlmService and roll out to all LLMs
This commit is contained in:
cybermaggedon 2025-09-25 21:26:11 +01:00 committed by GitHub
parent aa8e422e8c
commit 6f4f7ce6b4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 164 additions and 72 deletions

View file

@ -5,7 +5,7 @@ LLM text completion base class
import time
import logging
from prometheus_client import Histogram
from prometheus_client import Histogram, Info
from .. schema import TextCompletionRequest, TextCompletionResponse, Error
from .. exceptions import TooManyRequests
@ -62,6 +62,12 @@ class LlmService(FlowProcessor):
)
)
self.register_specification(
ParameterSpec(
name = "temperature",
)
)
if not hasattr(__class__, "text_completion_metric"):
__class__.text_completion_metric = Histogram(
'text_completion_duration',
@ -76,6 +82,13 @@ class LlmService(FlowProcessor):
]
)
if not hasattr(__class__, "text_completion_model_metric"):
__class__.text_completion_model_metric = Info(
'text_completion_model',
'Text completion model',
["processor", "flow"]
)
async def on_request(self, msg, consumer, flow):
try:
@ -92,11 +105,19 @@ class LlmService(FlowProcessor):
).time():
model = flow("model")
temperature = flow("temperature")
response = await self.generate_content(
request.system, request.prompt, model
request.system, request.prompt, model, temperature
)
await __class__.text_completion_model_metric.labels(
id = flow.id, flow = flow.name
).info({
"model": model,
"temperature": temperature,
})
await flow("response").send(
TextCompletionResponse(
error=None,