diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 0fe980dd..67073c79 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -51,11 +51,12 @@ properties: type: string default: type: boolean + endpoint: + type: string additionalProperties: false required: - name - provider - - access_key - model overrides: type: object diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 5eac257f..3e278c1c 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -538,6 +538,24 @@ static_resources: tls_maximum_protocol_version: TLSv1_3 {% endif %} {% endfor %} + +{% for local_llm_provider in local_llms %} + - name: {{ local_llm_provider.name }} + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: {{ local_llm_provider.name }} + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: {{ local_llm_provider.endpoint }} + port_value: {{ local_llm_provider.port }} + hostname: {{ local_llm_provider.endpoint }} +{% endfor %} - name: arch_internal connect_timeout: 5s type: LOGICAL_DNS diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 3393bb5c..5379e909 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -16,18 +16,6 @@ ARCH_CONFIG_SCHEMA_FILE = os.getenv( ) -def add_secret_key_to_llm_providers(config_yaml): - llm_providers = [] - for llm_provider in config_yaml.get("llm_providers", []): - access_key_env_var = llm_provider.get("access_key", False) - access_key_value = os.getenv(access_key_env_var, False) - if access_key_env_var and access_key_value: - llm_provider["access_key"] = access_key_value - llm_providers.append(llm_provider) - config_yaml["llm_providers"] = llm_providers - return config_yaml - - def validate_and_render_schema(): env = Environment(loader=FileSystemLoader("./")) template = env.get_template("envoy.template.yaml") @@ -76,12 +64,23 @@ def validate_and_render_schema(): config_yaml["mode"] = "llm" arch_llm_config_string = yaml.dump(config_yaml) + llms_with_endpoint = [] + + for llm_provider in arch_llm_providers: + if llm_provider.get("endpoint", None): + endpoint = llm_provider["endpoint"] + if len(endpoint.split(":")) > 1: + llm_provider["endpoint"] = endpoint.split(":")[0] + llm_provider["port"] = int(endpoint.split(":")[1]) + llms_with_endpoint.append(llm_provider) + data = { "arch_config": arch_config_string, "arch_llm_config": arch_llm_config_string, "arch_clusters": inferred_clusters, "arch_llm_providers": arch_llm_providers, "arch_tracing": arch_tracing, + "local_llms": llms_with_endpoint, } rendered = template.render(data) diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index e83c1117..e196be21 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -171,6 +171,8 @@ pub struct LlmProvider { pub model: String, pub default: Option, pub stream: Option, + pub endpoint: Option, + pub port: Option, pub rate_limits: Option, } diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index bb36816f..50f46ac2 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -177,7 +177,10 @@ impl HttpContext for StreamContext { self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name); if let Err(error) = self.modify_auth_headers() { - self.send_server_error(error, Some(StatusCode::BAD_REQUEST)); + // ensure that the provider has an endpoint if the access key is missing else return a bad request + if self.llm_provider.as_ref().unwrap().endpoint.is_none() { + self.send_server_error(error, Some(StatusCode::BAD_REQUEST)); + } } self.delete_content_length_header(); self.save_ratelimit_header(); diff --git a/tests/archgw/arch_config.yaml b/tests/archgw/arch_config.yaml index 3f450717..e9afa5c1 100644 --- a/tests/archgw/arch_config.yaml +++ b/tests/archgw/arch_config.yaml @@ -12,22 +12,13 @@ endpoints: connect_timeout: 0.005s llm_providers: - - name: gpt-4o-mini - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o-mini + + - name: local-llm + provider: local-llm + endpoint: host.docker.internal:51002 + model: test-local-model default: true - - name: gpt-3.5-turbo-0125 - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-3.5-turbo-0125 - - - name: gpt-4o - access_key: $OPENAI_API_KEY - provider: openai - model: gpt-4o - system_prompt: | You are a helpful assistant.