fixes based on PR

This commit is contained in:
Salman Paracha 2025-09-29 17:43:04 -07:00
parent 17fe859a71
commit 067d886b8d
5 changed files with 18 additions and 9 deletions

View file

@ -140,7 +140,7 @@ static_resources:
route: route:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ llm_cluster_name }} cluster: {{ llm_cluster_name }}
timeout: 60s timeout: 300s
{% endfor %} {% endfor %}
{% if agent_orchestrator %} {% if agent_orchestrator %}
@ -153,7 +153,7 @@ static_resources:
route: route:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ agent_orchestrator }} cluster: {{ agent_orchestrator }}
timeout: 60s timeout: 300s
{% endif %} {% endif %}
http_filters: http_filters:
- name: envoy.filters.http.compressor - name: envoy.filters.http.compressor
@ -266,7 +266,7 @@ static_resources:
route: route:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ internal_cluster }} cluster: {{ internal_cluster }}
timeout: 60s timeout: 300s
{% endfor %} {% endfor %}
{% for cluster_name, cluster in arch_clusters.items() %} {% for cluster_name, cluster in arch_clusters.items() %}
@ -279,7 +279,7 @@ static_resources:
route: route:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ cluster_name }} cluster: {{ cluster_name }}
timeout: 60s timeout: 300s
{% endfor %} {% endfor %}
http_filters: http_filters:
- name: envoy.filters.http.router - name: envoy.filters.http.router
@ -434,7 +434,7 @@ static_resources:
route: route:
auto_host_rewrite: true auto_host_rewrite: true
cluster: {{ llm_cluster_name }} cluster: {{ llm_cluster_name }}
timeout: 60s timeout: 300s
{% endfor %} {% endfor %}
- match: - match:
prefix: "/" prefix: "/"

View file

@ -242,7 +242,7 @@ def validate_and_render_schema():
if llm_gateway_listener.get("address") == None: if llm_gateway_listener.get("address") == None:
llm_gateway_listener["address"] = "127.0.0.1" llm_gateway_listener["address"] = "127.0.0.1"
if llm_gateway_listener.get("timeout") == None: if llm_gateway_listener.get("timeout") == None:
llm_gateway_listener["timeout"] = "10s" llm_gateway_listener["timeout"] = "300s"
use_agent_orchestrator = config_yaml.get("overrides", {}).get( use_agent_orchestrator = config_yaml.get("overrides", {}).get(
"use_agent_orchestrator", False "use_agent_orchestrator", False

View file

@ -95,9 +95,9 @@ def find_config_file(path=".", file=None):
return os.path.abspath(file) return os.path.abspath(file)
else: else:
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience # If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml")) arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
if not os.path.exists(arch_config_file): if not os.path.exists(arch_config_file):
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml")) arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
return arch_config_file return arch_config_file

View file

@ -117,6 +117,13 @@ impl ChatCompletionsRequest {
self.max_tokens = None; self.max_tokens = None;
} }
} }
pub fn fix_temperature_if_gpt5(&mut self) {
let model = self.model.as_str();
if model.starts_with("gpt-5") {
self.temperature = Some(1.0);
}
}
} }
// ============================================================================ // ============================================================================
@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?; let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
// Use the centralized suppression logic // Use the centralized suppression logic
req.suppress_max_tokens_if_o3(); req.suppress_max_tokens_if_o3();
req.fix_temperature_if_gpt5();
Ok(req) Ok(req)
} }
} }

View file

@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
..Default::default() ..Default::default()
}; };
_chat_completions_req.suppress_max_tokens_if_o3(); _chat_completions_req.suppress_max_tokens_if_o3();
_chat_completions_req.fix_temperature_if_gpt5();
Ok(_chat_completions_req) Ok(_chat_completions_req)
} }
} }
@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
"unknown", "unknown",
MessageDelta { MessageDelta {
role: None, role: None,
content: Some(format!("[Thinking: {}]", thinking)), content: Some(format!("thinking: {}", thinking)),
refusal: None, refusal: None,
function_call: None, function_call: None,
tool_calls: None, tool_calls: None,