fixes based on PR

This commit is contained in:
Salman Paracha 2025-09-29 17:43:04 -07:00
parent 17fe859a71
commit 067d886b8d
5 changed files with 18 additions and 9 deletions

View file

@ -140,7 +140,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ llm_cluster_name }}
timeout: 60s
timeout: 300s
{% endfor %}
{% if agent_orchestrator %}
@ -153,7 +153,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ agent_orchestrator }}
timeout: 60s
timeout: 300s
{% endif %}
http_filters:
- name: envoy.filters.http.compressor
@ -266,7 +266,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ internal_cluster }}
timeout: 60s
timeout: 300s
{% endfor %}
{% for cluster_name, cluster in arch_clusters.items() %}
@ -279,7 +279,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ cluster_name }}
timeout: 60s
timeout: 300s
{% endfor %}
http_filters:
- name: envoy.filters.http.router
@ -434,7 +434,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: {{ llm_cluster_name }}
timeout: 60s
timeout: 300s
{% endfor %}
- match:
prefix: "/"

View file

@ -242,7 +242,7 @@ def validate_and_render_schema():
if llm_gateway_listener.get("address") == None:
llm_gateway_listener["address"] = "127.0.0.1"
if llm_gateway_listener.get("timeout") == None:
llm_gateway_listener["timeout"] = "10s"
llm_gateway_listener["timeout"] = "300s"
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
"use_agent_orchestrator", False

View file

@ -95,9 +95,9 @@ def find_config_file(path=".", file=None):
return os.path.abspath(file)
else:
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
if not os.path.exists(arch_config_file):
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
return arch_config_file

View file

@ -117,6 +117,13 @@ impl ChatCompletionsRequest {
self.max_tokens = None;
}
}
pub fn fix_temperature_if_gpt5(&mut self) {
let model = self.model.as_str();
if model.starts_with("gpt-5") {
self.temperature = Some(1.0);
}
}
}
// ============================================================================
@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
// Use the centralized suppression logic
req.suppress_max_tokens_if_o3();
req.fix_temperature_if_gpt5();
Ok(req)
}
}

View file

@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
..Default::default()
};
_chat_completions_req.suppress_max_tokens_if_o3();
_chat_completions_req.fix_temperature_if_gpt5();
Ok(_chat_completions_req)
}
}
@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
"unknown",
MessageDelta {
role: None,
content: Some(format!("[Thinking: {}]", thinking)),
content: Some(format!("thinking: {}", thinking)),
refusal: None,
function_call: None,
tool_calls: None,