fixes based on PR

2026-06-17 15:25:17 +02:00 · 2025-09-29 17:43:04 -07:00 · 2025-09-29 17:43:04 -07:00 · 067d886b8d
commit 067d886b8d
parent 17fe859a71
5 changed files with 18 additions and 9 deletions
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -140,7 +140,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ llm_cluster_name }}
-                            timeout: 60s
+                            timeout: 300s
                      {% endfor %}

                      {% if agent_orchestrator %}
@ -153,7 +153,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ agent_orchestrator }}
-                            timeout: 60s
+                            timeout: 300s
                      {% endif %}
                http_filters:
                  - name: envoy.filters.http.compressor
@ -266,7 +266,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ internal_cluster }}
-                            timeout: 60s
+                            timeout: 300s
                        {% endfor %}

                        {% for cluster_name, cluster in arch_clusters.items() %}
@ -279,7 +279,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ cluster_name }}
-                            timeout: 60s
+                            timeout: 300s
                        {% endfor %}
                http_filters:
                  - name: envoy.filters.http.router
@ -434,7 +434,7 @@ static_resources:
                          route:
                            auto_host_rewrite: true
                            cluster: {{ llm_cluster_name }}
-                            timeout: 60s
+                            timeout: 300s
                      {% endfor %}
                        - match:
                            prefix: "/"
--- a/arch/tools/cli/config_generator.py
+++ b/arch/tools/cli/config_generator.py
@ -242,7 +242,7 @@ def validate_and_render_schema():
    if llm_gateway_listener.get("address") == None:
        llm_gateway_listener["address"] = "127.0.0.1"
    if llm_gateway_listener.get("timeout") == None:
-        llm_gateway_listener["timeout"] = "10s"
+        llm_gateway_listener["timeout"] = "300s"

    use_agent_orchestrator = config_yaml.get("overrides", {}).get(
        "use_agent_orchestrator", False
--- a/arch/tools/cli/utils.py
+++ b/arch/tools/cli/utils.py
@ -95,9 +95,9 @@ def find_config_file(path=".", file=None):
        return os.path.abspath(file)
    else:
        # If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
-        arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
+        arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
        if not os.path.exists(arch_config_file):
-            arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
+            arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
        return arch_config_file


--- a/crates/hermesllm/src/apis/openai.rs
+++ b/crates/hermesllm/src/apis/openai.rs
@ -117,6 +117,13 @@ impl ChatCompletionsRequest {
            self.max_tokens = None;
        }
    }
+
+    pub fn fix_temperature_if_gpt5(&mut self) {
+        let model = self.model.as_str();
+        if model.starts_with("gpt-5") {
+            self.temperature = Some(1.0);
+        }
+    }
 }

 // ============================================================================
@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
       let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
        // Use the centralized suppression logic
        req.suppress_max_tokens_if_o3();
+        req.fix_temperature_if_gpt5();
        Ok(req)
    }
 }
--- a/crates/hermesllm/src/clients/transformer.rs
+++ b/crates/hermesllm/src/clients/transformer.rs
@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
            ..Default::default()
        };
        _chat_completions_req.suppress_max_tokens_if_o3();
+        _chat_completions_req.fix_temperature_if_gpt5();
        Ok(_chat_completions_req)
    }
 }
@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
                "unknown",
                MessageDelta {
                    role: None,
-                    content: Some(format!("[Thinking: {}]", thinking)),
+                    content: Some(format!("thinking: {}", thinking)),
                    refusal: None,
                    function_call: None,
                    tool_calls: None,