From 067d886b8dca93ac752ccbac1e4ffb8ad1832a4a Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 29 Sep 2025 17:43:04 -0700 Subject: [PATCH] fixes based on PR --- arch/envoy.template.yaml | 10 +++++----- arch/tools/cli/config_generator.py | 2 +- arch/tools/cli/utils.py | 4 ++-- crates/hermesllm/src/apis/openai.rs | 8 ++++++++ crates/hermesllm/src/clients/transformer.rs | 3 ++- 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 5ee4c899..16b14343 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -140,7 +140,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ llm_cluster_name }} - timeout: 60s + timeout: 300s {% endfor %} {% if agent_orchestrator %} @@ -153,7 +153,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ agent_orchestrator }} - timeout: 60s + timeout: 300s {% endif %} http_filters: - name: envoy.filters.http.compressor @@ -266,7 +266,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ internal_cluster }} - timeout: 60s + timeout: 300s {% endfor %} {% for cluster_name, cluster in arch_clusters.items() %} @@ -279,7 +279,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ cluster_name }} - timeout: 60s + timeout: 300s {% endfor %} http_filters: - name: envoy.filters.http.router @@ -434,7 +434,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ llm_cluster_name }} - timeout: 60s + timeout: 300s {% endfor %} - match: prefix: "/" diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 8f0dcefd..965bf040 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -242,7 +242,7 @@ def validate_and_render_schema(): if llm_gateway_listener.get("address") == None: llm_gateway_listener["address"] = "127.0.0.1" if llm_gateway_listener.get("timeout") == None: - llm_gateway_listener["timeout"] = "10s" + llm_gateway_listener["timeout"] = "300s" use_agent_orchestrator = config_yaml.get("overrides", {}).get( "use_agent_orchestrator", False diff --git a/arch/tools/cli/utils.py b/arch/tools/cli/utils.py index d7adca60..c7d39d66 100644 --- a/arch/tools/cli/utils.py +++ b/arch/tools/cli/utils.py @@ -95,9 +95,9 @@ def find_config_file(path=".", file=None): return os.path.abspath(file) else: # If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience - arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml")) + arch_config_file = os.path.abspath(os.path.join(path, "config.yaml")) if not os.path.exists(arch_config_file): - arch_config_file = os.path.abspath(os.path.join(path, "config.yaml")) + arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml")) return arch_config_file diff --git a/crates/hermesllm/src/apis/openai.rs b/crates/hermesllm/src/apis/openai.rs index c4181768..d7d6ea70 100644 --- a/crates/hermesllm/src/apis/openai.rs +++ b/crates/hermesllm/src/apis/openai.rs @@ -117,6 +117,13 @@ impl ChatCompletionsRequest { self.max_tokens = None; } } + + pub fn fix_temperature_if_gpt5(&mut self) { + let model = self.model.as_str(); + if model.starts_with("gpt-5") { + self.temperature = Some(1.0); + } + } } // ============================================================================ @@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest { let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?; // Use the centralized suppression logic req.suppress_max_tokens_if_o3(); + req.fix_temperature_if_gpt5(); Ok(req) } } diff --git a/crates/hermesllm/src/clients/transformer.rs b/crates/hermesllm/src/clients/transformer.rs index 33d2b8c1..0856c359 100644 --- a/crates/hermesllm/src/clients/transformer.rs +++ b/crates/hermesllm/src/clients/transformer.rs @@ -111,6 +111,7 @@ impl TryFrom for ChatCompletionsRequest { ..Default::default() }; _chat_completions_req.suppress_max_tokens_if_o3(); + _chat_completions_req.fix_temperature_if_gpt5(); Ok(_chat_completions_req) } } @@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result