diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 4e4ec80c..e8c7e348 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -43,7 +43,12 @@ properties: properties: name: type: string + # this field is deprecated, use provider_interface instead provider: + type: string + enum: + - openai + provider_interface: type: string enum: - openai @@ -59,7 +64,6 @@ properties: additionalProperties: false required: - name - - provider - model overrides: type: object diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 30a4497d..17147cc7 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -125,15 +125,21 @@ static_resources: - "*" routes: {% for provider in arch_llm_providers %} + # if endpoint is set then use custom cluster for upstream llm + {% if provider.endpoint %} + {% set llm_cluster_name = provider.name %} + {% else %} + {% set llm_cluster_name = provider.provider_interface %} + {% endif %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: - exact: {{ provider.provider }} + exact: {{ llm_cluster_name }} route: auto_host_rewrite: true - cluster: {{ provider.provider }} + cluster: {{ llm_cluster_name }} timeout: 60s {% endfor %} http_filters: @@ -237,16 +243,16 @@ static_resources: domains: - "*" routes: - {% for internal_clustrer in ["arch_fc", "model_server"] %} + {% for internal_cluster in ["arch_fc", "model_server"] %} - match: prefix: "/" headers: - name: "x-arch-upstream" string_match: - exact: {{ internal_clustrer }} + exact: {{ internal_cluster }} route: auto_host_rewrite: true - cluster: {{ internal_clustrer }} + cluster: {{ internal_cluster }} timeout: 60s {% endfor %} @@ -370,15 +376,21 @@ static_resources: cluster: openai timeout: 60s {% for provider in arch_llm_providers %} + # if endpoint is set then use custom cluster for upstream llm + {% if provider.endpoint %} + {% set llm_cluster_name = provider.name %} + {% else %} + {% set llm_cluster_name = provider.provider_interface %} + {% endif %} - match: prefix: "/" headers: - name: "x-arch-llm-provider" string_match: - exact: {{ provider.provider }} + exact: {{ llm_cluster_name }} route: auto_host_rewrite: true - cluster: {{ provider.provider }} + cluster: {{ llm_cluster_name }} timeout: 60s {% endfor %} - match: diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 5379e909..e535894b 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -58,15 +58,23 @@ def validate_and_render_schema(): f"Unknown endpoint {name}, please add it in endpoints section in your arch_config.yaml file" ) - arch_llm_providers = config_yaml["llm_providers"] arch_tracing = config_yaml.get("tracing", {}) - arch_config_string = yaml.dump(config_yaml) - config_yaml["mode"] = "llm" - arch_llm_config_string = yaml.dump(config_yaml) llms_with_endpoint = [] - for llm_provider in arch_llm_providers: + updated_llm_providers = [] + for llm_provider in config_yaml["llm_providers"]: + provider = None + if llm_provider.get("provider") and llm_provider.get("provider_interface"): + raise Exception( + "Please provide either provider or provider_interface, not both" + ) + if llm_provider.get("provider"): + provider = llm_provider["provider"] + llm_provider["provider_interface"] = provider + del llm_provider["provider"] + updated_llm_providers.append(llm_provider) + if llm_provider.get("endpoint", None): endpoint = llm_provider["endpoint"] if len(endpoint.split(":")) > 1: @@ -74,11 +82,16 @@ def validate_and_render_schema(): llm_provider["port"] = int(endpoint.split(":")[1]) llms_with_endpoint.append(llm_provider) + config_yaml["llm_providers"] = updated_llm_providers + + arch_config_string = yaml.dump(config_yaml) + arch_llm_config_string = yaml.dump(config_yaml) + data = { "arch_config": arch_config_string, "arch_llm_config": arch_llm_config_string, "arch_clusters": inferred_clusters, - "arch_llm_providers": arch_llm_providers, + "arch_llm_providers": config_yaml["llm_providers"], "arch_tracing": arch_tracing, "local_llms": llms_with_endpoint, } diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index d0c73c63..fbafe7b9 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -183,7 +183,7 @@ impl Display for LlmProviderType { //TODO: use enum for model, but if there is a new model, we need to update the code pub struct LlmProvider { pub name: String, - pub provider: LlmProviderType, + pub provider_interface: LlmProviderType, pub access_key: Option, pub model: String, pub default: Option, diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index fba443f0..39d4c58f 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -179,7 +179,7 @@ impl HttpContext for StreamContext { if self.llm_provider().endpoint.is_none() { self.add_http_request_header( ARCH_ROUTING_HEADER, - &self.llm_provider().provider.to_string(), + &self.llm_provider().provider_interface.to_string(), ); } else { self.add_http_request_header(ARCH_ROUTING_HEADER, &self.llm_provider().name); diff --git a/demos/currency_exchange_ollama/arch_config.yaml b/demos/currency_exchange_ollama/arch_config.yaml index c555a302..5936ff17 100644 --- a/demos/currency_exchange_ollama/arch_config.yaml +++ b/demos/currency_exchange_ollama/arch_config.yaml @@ -8,9 +8,10 @@ listener: llm_providers: - name: local-llama - provider: openai + provider_interface: openai model: llama3.2 endpoint: host.docker.internal:11434 + default: true system_prompt: | You are a helpful assistant.