Fix AWS bedrock issues with newer model invocation (#572)

- Fixed models so that global.* models work
- Fixed Claude 4.5 & 4.7 invocation by removing top_p top_k params
This commit is contained in:
cybermaggedon 2025-12-01 21:50:05 +00:00 committed by GitHub
parent 72cb1c98e0
commit c808d26b0b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -21,8 +21,6 @@ default_ident = "text-completion"
default_model = 'mistral.mistral-large-2407-v1:0' default_model = 'mistral.mistral-large-2407-v1:0'
default_temperature = 0.0 default_temperature = 0.0
default_max_output = 2048 default_max_output = 2048
default_top_p = 0.99
default_top_k = 40
# Actually, these could all just be None, no need to get environment # Actually, these could all just be None, no need to get environment
# variables, as Boto3 would pick all these up if not passed in as args # variables, as Boto3 would pick all these up if not passed in as args
@ -38,16 +36,10 @@ class ModelHandler:
def __init__(self): def __init__(self):
self.temperature = default_temperature self.temperature = default_temperature
self.max_output = default_max_output self.max_output = default_max_output
self.top_p = default_top_p
self.top_k = default_top_k
def set_temperature(self, temperature): def set_temperature(self, temperature):
self.temperature = temperature self.temperature = temperature
def set_max_output(self, max_output): def set_max_output(self, max_output):
self.max_output = max_output self.max_output = max_output
def set_top_p(self, top_p):
self.top_p = top_p
def set_top_k(self, top_k):
self.top_k = top_k
def encode_request(self, system, prompt): def encode_request(self, system, prompt):
raise RuntimeError("format_request not implemented") raise RuntimeError("format_request not implemented")
def decode_response(self, response): def decode_response(self, response):
@ -57,15 +49,12 @@ class ModelHandler:
class Mistral(ModelHandler): class Mistral(ModelHandler):
def __init__(self): def __init__(self):
self.top_p = 0.99 pass
self.top_k = 40
def encode_request(self, system, prompt): def encode_request(self, system, prompt):
return json.dumps({ return json.dumps({
"prompt": f"{system}\n\n{prompt}", "prompt": f"{system}\n\n{prompt}",
"max_tokens": self.max_output, "max_tokens": self.max_output,
"temperature": self.temperature, "temperature": self.temperature,
"top_p": self.top_p,
"top_k": self.top_k,
}) })
def decode_response(self, response): def decode_response(self, response):
response_body = json.loads(response.get("body").read()) response_body = json.loads(response.get("body").read())
@ -79,13 +68,12 @@ class Mistral(ModelHandler):
# Llama 3 # Llama 3
class Meta(ModelHandler): class Meta(ModelHandler):
def __init__(self): def __init__(self):
self.top_p = 0.95 pass
def encode_request(self, system, prompt): def encode_request(self, system, prompt):
return json.dumps({ return json.dumps({
"prompt": f"{system}\n\n{prompt}", "prompt": f"{system}\n\n{prompt}",
"max_gen_len": self.max_output, "max_gen_len": self.max_output,
"temperature": self.temperature, "temperature": self.temperature,
"top_p": self.top_p,
}) })
def decode_response(self, response): def decode_response(self, response):
model_response = json.loads(response["body"].read()) model_response = json.loads(response["body"].read())
@ -96,13 +84,12 @@ class Meta(ModelHandler):
class Anthropic(ModelHandler): class Anthropic(ModelHandler):
def __init__(self): def __init__(self):
self.top_p = 0.999 pass
def encode_request(self, system, prompt): def encode_request(self, system, prompt):
return json.dumps({ return json.dumps({
"anthropic_version": "bedrock-2023-05-31", "anthropic_version": "bedrock-2023-05-31",
"max_tokens": self.max_output, "max_tokens": self.max_output,
"temperature": self.temperature, "temperature": self.temperature,
"top_p": self.top_p,
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@ -127,12 +114,11 @@ class Anthropic(ModelHandler):
class Ai21(ModelHandler): class Ai21(ModelHandler):
def __init__(self): def __init__(self):
self.top_p = 0.9 pass
def encode_request(self, system, prompt): def encode_request(self, system, prompt):
return json.dumps({ return json.dumps({
"max_tokens": self.max_output, "max_tokens": self.max_output,
"temperature": self.temperature, "temperature": self.temperature,
"top_p": self.top_p,
"messages": [ "messages": [
{ {
"role": "user", "role": "user",
@ -230,30 +216,17 @@ class Processor(LlmService):
def determine_variant(self, model): def determine_variant(self, model):
# FIXME: Missing, Amazon models, Deepseek if ".anthropic." in model or model.startswith("anthropic"):
return Anthropic
# This set of conditions deals with normal bedrock on-demand usage elif ".meta." in model or model.startswith("meta"):
if model.startswith("mistral"): return Meta
elif ".mistral." in model or model.startswith("mistral"):
return Mistral return Mistral
elif model.startswith("meta"): elif ".ai21." in model or model.startswith("ai21"):
return Meta
elif model.startswith("anthropic"):
return Anthropic
elif model.startswith("ai21"):
return Ai21 return Ai21
elif model.startswith("cohere"): elif ".cohere." in model or model.startswith("cohere"):
return Cohere return Cohere
# The inference profiles
if model.startswith("us.meta"):
return Meta
elif model.startswith("us.anthropic"):
return Anthropic
elif model.startswith("eu.meta"):
return Meta
elif model.startswith("eu.anthropic"):
return Anthropic
return Default return Default
def _get_or_create_variant(self, model_name, temperature=None): def _get_or_create_variant(self, model_name, temperature=None):