mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fixes based on PR
This commit is contained in:
parent
17fe859a71
commit
067d886b8d
5 changed files with 18 additions and 9 deletions
|
|
@ -140,7 +140,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
|
||||
{% if agent_orchestrator %}
|
||||
|
|
@ -153,7 +153,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ agent_orchestrator }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endif %}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
|
|
@ -266,7 +266,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ internal_cluster }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
|
||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||
|
|
@ -279,7 +279,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
|
|
@ -434,7 +434,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
timeout: 300s
|
||||
{% endfor %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
|
|
|
|||
|
|
@ -242,7 +242,7 @@ def validate_and_render_schema():
|
|||
if llm_gateway_listener.get("address") == None:
|
||||
llm_gateway_listener["address"] = "127.0.0.1"
|
||||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
llm_gateway_listener["timeout"] = "300s"
|
||||
|
||||
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
|
||||
"use_agent_orchestrator", False
|
||||
|
|
|
|||
|
|
@ -95,9 +95,9 @@ def find_config_file(path=".", file=None):
|
|||
return os.path.abspath(file)
|
||||
else:
|
||||
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
||||
if not os.path.exists(arch_config_file):
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||
return arch_config_file
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -117,6 +117,13 @@ impl ChatCompletionsRequest {
|
|||
self.max_tokens = None;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fix_temperature_if_gpt5(&mut self) {
|
||||
let model = self.model.as_str();
|
||||
if model.starts_with("gpt-5") {
|
||||
self.temperature = Some(1.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
|
|||
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
|
||||
// Use the centralized suppression logic
|
||||
req.suppress_max_tokens_if_o3();
|
||||
req.fix_temperature_if_gpt5();
|
||||
Ok(req)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
|
|||
..Default::default()
|
||||
};
|
||||
_chat_completions_req.suppress_max_tokens_if_o3();
|
||||
_chat_completions_req.fix_temperature_if_gpt5();
|
||||
Ok(_chat_completions_req)
|
||||
}
|
||||
}
|
||||
|
|
@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
|
|||
"unknown",
|
||||
MessageDelta {
|
||||
role: None,
|
||||
content: Some(format!("[Thinking: {}]", thinking)),
|
||||
content: Some(format!("thinking: {}", thinking)),
|
||||
refusal: None,
|
||||
function_call: None,
|
||||
tool_calls: None,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue