pending changes

This commit is contained in:
Adil Hafeez 2025-01-14 16:32:52 -08:00
parent 9570b167db
commit 1a10b82724
6 changed files with 42 additions and 28 deletions

View file

@ -51,11 +51,12 @@ properties:
type: string
default:
type: boolean
endpoint:
type: string
additionalProperties: false
required:
- name
- provider
- access_key
- model
overrides:
type: object

View file

@ -538,6 +538,24 @@ static_resources:
tls_maximum_protocol_version: TLSv1_3
{% endif %}
{% endfor %}
{% for local_llm_provider in local_llms %}
- name: {{ local_llm_provider.name }}
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: {{ local_llm_provider.name }}
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: {{ local_llm_provider.endpoint }}
port_value: {{ local_llm_provider.port }}
hostname: {{ local_llm_provider.endpoint }}
{% endfor %}
- name: arch_internal
connect_timeout: 5s
type: LOGICAL_DNS

View file

@ -16,18 +16,6 @@ ARCH_CONFIG_SCHEMA_FILE = os.getenv(
)
def add_secret_key_to_llm_providers(config_yaml):
llm_providers = []
for llm_provider in config_yaml.get("llm_providers", []):
access_key_env_var = llm_provider.get("access_key", False)
access_key_value = os.getenv(access_key_env_var, False)
if access_key_env_var and access_key_value:
llm_provider["access_key"] = access_key_value
llm_providers.append(llm_provider)
config_yaml["llm_providers"] = llm_providers
return config_yaml
def validate_and_render_schema():
env = Environment(loader=FileSystemLoader("./"))
template = env.get_template("envoy.template.yaml")
@ -76,12 +64,23 @@ def validate_and_render_schema():
config_yaml["mode"] = "llm"
arch_llm_config_string = yaml.dump(config_yaml)
llms_with_endpoint = []
for llm_provider in arch_llm_providers:
if llm_provider.get("endpoint", None):
endpoint = llm_provider["endpoint"]
if len(endpoint.split(":")) > 1:
llm_provider["endpoint"] = endpoint.split(":")[0]
llm_provider["port"] = int(endpoint.split(":")[1])
llms_with_endpoint.append(llm_provider)
data = {
"arch_config": arch_config_string,
"arch_llm_config": arch_llm_config_string,
"arch_clusters": inferred_clusters,
"arch_llm_providers": arch_llm_providers,
"arch_tracing": arch_tracing,
"local_llms": llms_with_endpoint,
}
rendered = template.render(data)

View file

@ -171,6 +171,8 @@ pub struct LlmProvider {
pub model: String,
pub default: Option<bool>,
pub stream: Option<bool>,
pub endpoint: Option<String>,
pub port: Option<u16>,
pub rate_limits: Option<LlmRatelimit>,
}

View file

@ -177,7 +177,10 @@ impl HttpContext for StreamContext {
self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name);
if let Err(error) = self.modify_auth_headers() {
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
// ensure that the provider has an endpoint if the access key is missing else return a bad request
if self.llm_provider.as_ref().unwrap().endpoint.is_none() {
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
}
}
self.delete_content_length_header();
self.save_ratelimit_header();

View file

@ -12,22 +12,13 @@ endpoints:
connect_timeout: 0.005s
llm_providers:
- name: gpt-4o-mini
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o-mini
- name: local-llm
provider: local-llm
endpoint: host.docker.internal:51002
model: test-local-model
default: true
- name: gpt-3.5-turbo-0125
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-3.5-turbo-0125
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4o
system_prompt: |
You are a helpful assistant.