From a92e75c5ea25800880fed354c9a25a4e7e271fac Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Thu, 18 Sep 2025 09:51:27 -0700 Subject: [PATCH] fixing failing tests and updating rederend config file --- arch/envoy.template.yaml | 26 ------------------- arch/tools/cli/config_generator.py | 1 - crates/common/src/configuration.rs | 3 --- crates/hermesllm/src/providers/id.rs | 9 ++----- demos/use_cases/llm_routing/arch_config.yaml | 6 +++++ .../includes/arch_config_full_reference.yaml | 10 +++++++ .../arch_config_full_reference_rendered.yaml | 6 +++++ 7 files changed, 24 insertions(+), 37 deletions(-) diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index a00bb19d..5ee4c899 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -626,32 +626,6 @@ static_resources: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - - name: lambda_ai - connect_timeout: 0.5s - type: LOGICAL_DNS - dns_lookup_family: V4_ONLY - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: xai - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.lambda.ai - port_value: 443 - hostname: "api.lambda.ai" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.lambda.ai - common_tls_context: - tls_params: - tls_minimum_protocol_version: TLSv1_2 - tls_maximum_protocol_version: TLSv1_3 - - - name: gemini connect_timeout: 0.5s type: LOGICAL_DNS diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index d6d746e9..0fd02979 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -15,7 +15,6 @@ SUPPORTED_PROVIDERS = [ "gemini", "anthropic", "together_ai", - "lambda_ai", "azure_openai", "xai", ] diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index efd99704..034e9148 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -171,8 +171,6 @@ pub enum LlmProviderType { XAI, #[serde(rename = "together_ai")] TogetherAI, - #[serde(rename = "lambda_ai")] - LambdaAI, #[serde(rename = "azure_openai")] AzureOpenAI, } @@ -189,7 +187,6 @@ impl Display for LlmProviderType { LlmProviderType::OpenAI => write!(f, "openai"), LlmProviderType::XAI => write!(f, "xai"), LlmProviderType::TogetherAI => write!(f, "together_ai"), - LlmProviderType::LambdaAI => write!(f, "lambda_ai"), LlmProviderType::AzureOpenAI => write!(f, "azure_openai"), } } diff --git a/crates/hermesllm/src/providers/id.rs b/crates/hermesllm/src/providers/id.rs index bc8691f2..13ef4c6e 100644 --- a/crates/hermesllm/src/providers/id.rs +++ b/crates/hermesllm/src/providers/id.rs @@ -16,7 +16,6 @@ pub enum ProviderId { AzureOpenAI, XAI, TogetherAI, - LambdaAI, } impl From<&str> for ProviderId { @@ -33,7 +32,6 @@ impl From<&str> for ProviderId { "azure_openai" => ProviderId::AzureOpenAI, "xai" => ProviderId::XAI, "together_ai" => ProviderId::TogetherAI, - "lambda_ai" => ProviderId::LambdaAI, _ => panic!("Unknown provider: {}", value), } } @@ -57,8 +55,7 @@ impl ProviderId { | ProviderId::GitHub | ProviderId::AzureOpenAI | ProviderId::XAI - | ProviderId::TogetherAI - | ProviderId::LambdaAI, + | ProviderId::TogetherAI, SupportedAPIs::AnthropicMessagesAPI(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), (ProviderId::OpenAI @@ -70,8 +67,7 @@ impl ProviderId { | ProviderId::GitHub | ProviderId::AzureOpenAI | ProviderId::XAI - | ProviderId::TogetherAI - | ProviderId::LambdaAI, + | ProviderId::TogetherAI, SupportedAPIs::OpenAIChatCompletions(_)) => SupportedAPIs::OpenAIChatCompletions(OpenAIApi::ChatCompletions), } } @@ -91,7 +87,6 @@ impl Display for ProviderId { ProviderId::AzureOpenAI => write!(f, "azure_openai"), ProviderId::XAI => write!(f, "xai"), ProviderId::TogetherAI => write!(f, "together_ai"), - ProviderId::LambdaAI => write!(f, "lambda_ai"), } } } diff --git a/demos/use_cases/llm_routing/arch_config.yaml b/demos/use_cases/llm_routing/arch_config.yaml index f90643ff..176f53e9 100644 --- a/demos/use_cases/llm_routing/arch_config.yaml +++ b/demos/use_cases/llm_routing/arch_config.yaml @@ -37,6 +37,12 @@ llm_providers: - access_key: $GEMINI_API_KEY model: gemini/gemini-1.5-pro-latest + - model: xai/grok-4-latest + access_key: $GROK_API_KEY + + - model: together_ai/openai/gpt-oss-20b + access_key: $TOGETHER_API_KEY + - model: custom/test-model base_url: http://host.docker.internal:11223 provider_interface: openai diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index 808baff1..c9d5e4ff 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -41,6 +41,16 @@ llm_providers: - model: mistral/mistral-7b-instruct base_url: http://mistral_local +# Model aliases - friendly names that map to actual provider names +model_aliases: + # Alias for summarization tasks -> fast/cheap model + arch.summarize.v1: + target: gpt-4o + + # Alias for general purpose tasks -> latest model + arch.v1: + target: mistral-8x7b + # provides a way to override default settings for the arch system overrides: # By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target. diff --git a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml index 503f6a80..4c791e82 100644 --- a/docs/source/resources/includes/arch_config_full_reference_rendered.yaml +++ b/docs/source/resources/includes/arch_config_full_reference_rendered.yaml @@ -31,12 +31,18 @@ llm_providers: name: mistral/mistral-8x7b provider_interface: mistral - base_url: http://mistral_local + cluster_name: mistral_mistral_local endpoint: mistral_local model: mistral-7b-instruct name: mistral/mistral-7b-instruct port: 80 protocol: http provider_interface: mistral +model_aliases: + arch.summarize.v1: + target: gpt-4o + arch.v1: + target: mistral-8x7b overrides: prompt_target_intent_matching_threshold: 0.6 prompt_guards: