mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
fixes based on PR
This commit is contained in:
parent
17fe859a71
commit
067d886b8d
5 changed files with 18 additions and 9 deletions
|
|
@ -140,7 +140,7 @@ static_resources:
|
||||||
route:
|
route:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ llm_cluster_name }}
|
cluster: {{ llm_cluster_name }}
|
||||||
timeout: 60s
|
timeout: 300s
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{% if agent_orchestrator %}
|
{% if agent_orchestrator %}
|
||||||
|
|
@ -153,7 +153,7 @@ static_resources:
|
||||||
route:
|
route:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ agent_orchestrator }}
|
cluster: {{ agent_orchestrator }}
|
||||||
timeout: 60s
|
timeout: 300s
|
||||||
{% endif %}
|
{% endif %}
|
||||||
http_filters:
|
http_filters:
|
||||||
- name: envoy.filters.http.compressor
|
- name: envoy.filters.http.compressor
|
||||||
|
|
@ -266,7 +266,7 @@ static_resources:
|
||||||
route:
|
route:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ internal_cluster }}
|
cluster: {{ internal_cluster }}
|
||||||
timeout: 60s
|
timeout: 300s
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
{% for cluster_name, cluster in arch_clusters.items() %}
|
{% for cluster_name, cluster in arch_clusters.items() %}
|
||||||
|
|
@ -279,7 +279,7 @@ static_resources:
|
||||||
route:
|
route:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ cluster_name }}
|
cluster: {{ cluster_name }}
|
||||||
timeout: 60s
|
timeout: 300s
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
http_filters:
|
http_filters:
|
||||||
- name: envoy.filters.http.router
|
- name: envoy.filters.http.router
|
||||||
|
|
@ -434,7 +434,7 @@ static_resources:
|
||||||
route:
|
route:
|
||||||
auto_host_rewrite: true
|
auto_host_rewrite: true
|
||||||
cluster: {{ llm_cluster_name }}
|
cluster: {{ llm_cluster_name }}
|
||||||
timeout: 60s
|
timeout: 300s
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
- match:
|
- match:
|
||||||
prefix: "/"
|
prefix: "/"
|
||||||
|
|
|
||||||
|
|
@ -242,7 +242,7 @@ def validate_and_render_schema():
|
||||||
if llm_gateway_listener.get("address") == None:
|
if llm_gateway_listener.get("address") == None:
|
||||||
llm_gateway_listener["address"] = "127.0.0.1"
|
llm_gateway_listener["address"] = "127.0.0.1"
|
||||||
if llm_gateway_listener.get("timeout") == None:
|
if llm_gateway_listener.get("timeout") == None:
|
||||||
llm_gateway_listener["timeout"] = "10s"
|
llm_gateway_listener["timeout"] = "300s"
|
||||||
|
|
||||||
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
|
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
|
||||||
"use_agent_orchestrator", False
|
"use_agent_orchestrator", False
|
||||||
|
|
|
||||||
|
|
@ -95,9 +95,9 @@ def find_config_file(path=".", file=None):
|
||||||
return os.path.abspath(file)
|
return os.path.abspath(file)
|
||||||
else:
|
else:
|
||||||
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
|
# If no file is provided, use the path and look for arch_config.yaml first, then config.yaml for convenience
|
||||||
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
||||||
if not os.path.exists(arch_config_file):
|
if not os.path.exists(arch_config_file):
|
||||||
arch_config_file = os.path.abspath(os.path.join(path, "config.yaml"))
|
arch_config_file = os.path.abspath(os.path.join(path, "arch_config.yaml"))
|
||||||
return arch_config_file
|
return arch_config_file
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -117,6 +117,13 @@ impl ChatCompletionsRequest {
|
||||||
self.max_tokens = None;
|
self.max_tokens = None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn fix_temperature_if_gpt5(&mut self) {
|
||||||
|
let model = self.model.as_str();
|
||||||
|
if model.starts_with("gpt-5") {
|
||||||
|
self.temperature = Some(1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
@ -599,6 +606,7 @@ impl TryFrom<&[u8]> for ChatCompletionsRequest {
|
||||||
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
|
let mut req: ChatCompletionsRequest = serde_json::from_slice(bytes).map_err(OpenAIStreamError::from)?;
|
||||||
// Use the centralized suppression logic
|
// Use the centralized suppression logic
|
||||||
req.suppress_max_tokens_if_o3();
|
req.suppress_max_tokens_if_o3();
|
||||||
|
req.fix_temperature_if_gpt5();
|
||||||
Ok(req)
|
Ok(req)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ impl TryFrom<AnthropicMessagesRequest> for ChatCompletionsRequest {
|
||||||
..Default::default()
|
..Default::default()
|
||||||
};
|
};
|
||||||
_chat_completions_req.suppress_max_tokens_if_o3();
|
_chat_completions_req.suppress_max_tokens_if_o3();
|
||||||
|
_chat_completions_req.fix_temperature_if_gpt5();
|
||||||
Ok(_chat_completions_req)
|
Ok(_chat_completions_req)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1014,7 +1015,7 @@ fn convert_content_delta(delta: MessagesContentDelta) -> Result<ChatCompletionsS
|
||||||
"unknown",
|
"unknown",
|
||||||
MessageDelta {
|
MessageDelta {
|
||||||
role: None,
|
role: None,
|
||||||
content: Some(format!("[Thinking: {}]", thinking)),
|
content: Some(format!("thinking: {}", thinking)),
|
||||||
refusal: None,
|
refusal: None,
|
||||||
function_call: None,
|
function_call: None,
|
||||||
tool_calls: None,
|
tool_calls: None,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue