mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
rename arch provider to plano, use llm_routing_model and agent_orchestration_model
This commit is contained in:
parent
680dee60a0
commit
6f8bf96d38
16 changed files with 37 additions and 50 deletions
|
|
@ -13,7 +13,6 @@ SUPPORTED_PROVIDERS_WITH_BASE_URL = [
|
|||
"ollama",
|
||||
"qwen",
|
||||
"amazon_bedrock",
|
||||
"arch",
|
||||
"plano",
|
||||
]
|
||||
|
||||
|
|
@ -373,7 +372,7 @@ def validate_and_render_schema():
|
|||
model_name_set = {mp.get("model") for mp in updated_model_providers}
|
||||
|
||||
# Auto-add arch-router provider if routing preferences exist and no provider matches the router model
|
||||
router_model = overrides_config.get("router_model", "Arch-Router")
|
||||
router_model = overrides_config.get("llm_routing_model", "Arch-Router")
|
||||
# Strip provider prefix for comparison since config processing strips prefixes from model names
|
||||
router_model_id = (
|
||||
router_model.split("/", 1)[1] if "/" in router_model else router_model
|
||||
|
|
@ -382,7 +381,7 @@ def validate_and_render_schema():
|
|||
updated_model_providers.append(
|
||||
{
|
||||
"name": "arch-router",
|
||||
"provider_interface": "arch",
|
||||
"provider_interface": "plano",
|
||||
"model": router_model_id,
|
||||
"internal": True,
|
||||
}
|
||||
|
|
@ -393,7 +392,7 @@ def validate_and_render_schema():
|
|||
updated_model_providers.append(
|
||||
{
|
||||
"name": "arch-function",
|
||||
"provider_interface": "arch",
|
||||
"provider_interface": "plano",
|
||||
"model": "Arch-Function",
|
||||
"internal": True,
|
||||
}
|
||||
|
|
@ -401,7 +400,7 @@ def validate_and_render_schema():
|
|||
|
||||
# Auto-add plano-orchestrator provider if no provider matches the orchestrator model
|
||||
orchestrator_model = overrides_config.get(
|
||||
"orchestrator_model", "Plano-Orchestrator"
|
||||
"agent_orchestration_model", "Plano-Orchestrator"
|
||||
)
|
||||
orchestrator_model_id = (
|
||||
orchestrator_model.split("/", 1)[1]
|
||||
|
|
@ -411,8 +410,8 @@ def validate_and_render_schema():
|
|||
if orchestrator_model_id not in model_name_set:
|
||||
updated_model_providers.append(
|
||||
{
|
||||
"name": "plano-orchestrator",
|
||||
"provider_interface": "arch",
|
||||
"name": "plano/orchestrator",
|
||||
"provider_interface": "plano",
|
||||
"model": orchestrator_model_id,
|
||||
"internal": True,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -594,13 +594,13 @@ static_resources:
|
|||
|
||||
clusters:
|
||||
|
||||
- name: arch
|
||||
- name: plano
|
||||
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: arch
|
||||
cluster_name: plano
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
|
|
|
|||
|
|
@ -173,7 +173,6 @@ properties:
|
|||
provider_interface:
|
||||
type: string
|
||||
enum:
|
||||
- arch
|
||||
- plano
|
||||
- claude
|
||||
- deepseek
|
||||
|
|
@ -221,7 +220,6 @@ properties:
|
|||
provider_interface:
|
||||
type: string
|
||||
enum:
|
||||
- arch
|
||||
- plano
|
||||
- claude
|
||||
- deepseek
|
||||
|
|
@ -273,10 +271,10 @@ properties:
|
|||
upstream_tls_ca_path:
|
||||
type: string
|
||||
description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
|
||||
router_model:
|
||||
llm_routing_model:
|
||||
type: string
|
||||
description: "Model name for the LLM router (e.g., 'Arch-Router'). Must match a model in model_providers."
|
||||
orchestrator_model:
|
||||
agent_orchestration_model:
|
||||
type: string
|
||||
description: "Model name for the agent orchestrator (e.g., 'Plano-Orchestrator'). Must match a model in model_providers."
|
||||
system_prompt:
|
||||
|
|
|
|||
|
|
@ -94,7 +94,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
|
||||
// Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
|
||||
let routing_model_name: String = overrides
|
||||
.router_model
|
||||
.llm_routing_model
|
||||
.as_deref()
|
||||
.map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m))
|
||||
.unwrap_or(DEFAULT_ROUTING_MODEL_NAME)
|
||||
|
|
@ -116,7 +116,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
|
||||
// Strip provider prefix (e.g. "arch/") to get the model ID used in upstream requests
|
||||
let orchestrator_model_name: String = overrides
|
||||
.orchestrator_model
|
||||
.agent_orchestration_model
|
||||
.as_deref()
|
||||
.map(|m| m.split_once('/').map(|(_, id)| id).unwrap_or(m))
|
||||
.unwrap_or(DEFAULT_ORCHESTRATOR_MODEL_NAME)
|
||||
|
|
|
|||
|
|
@ -81,12 +81,12 @@ impl OrchestratorService {
|
|||
debug!(
|
||||
model = %self.orchestrator_model.get_model_name(),
|
||||
endpoint = %self.orchestrator_url,
|
||||
"sending request to arch-orchestrator"
|
||||
"sending request to plano-orchestrator"
|
||||
);
|
||||
|
||||
debug!(
|
||||
body = %serde_json::to_string(&orchestrator_request).unwrap(),
|
||||
"arch orchestrator request"
|
||||
"plano orchestrator request"
|
||||
);
|
||||
|
||||
let mut orchestration_request_headers = header::HeaderMap::new();
|
||||
|
|
|
|||
|
|
@ -77,8 +77,8 @@ pub struct Overrides {
|
|||
pub prompt_target_intent_matching_threshold: Option<f64>,
|
||||
pub optimize_context_window: Option<bool>,
|
||||
pub use_agent_orchestrator: Option<bool>,
|
||||
pub router_model: Option<String>,
|
||||
pub orchestrator_model: Option<String>,
|
||||
pub llm_routing_model: Option<String>,
|
||||
pub agent_orchestration_model: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -202,8 +202,6 @@ pub struct EmbeddingProviver {
|
|||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub enum LlmProviderType {
|
||||
#[serde(rename = "arch")]
|
||||
Arch,
|
||||
#[serde(rename = "anthropic")]
|
||||
Anthropic,
|
||||
#[serde(rename = "deepseek")]
|
||||
|
|
@ -239,7 +237,6 @@ pub enum LlmProviderType {
|
|||
impl Display for LlmProviderType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
LlmProviderType::Arch => write!(f, "arch"),
|
||||
LlmProviderType::Anthropic => write!(f, "anthropic"),
|
||||
LlmProviderType::Deepseek => write!(f, "deepseek"),
|
||||
LlmProviderType::Groq => write!(f, "groq"),
|
||||
|
|
@ -263,15 +260,7 @@ impl LlmProviderType {
|
|||
/// Get the ProviderId for this LlmProviderType
|
||||
/// Used with the new function-based hermesllm API
|
||||
pub fn to_provider_id(&self) -> hermesllm::ProviderId {
|
||||
// Plano provider uses the same interface as Arch
|
||||
let provider_str = match self {
|
||||
LlmProviderType::Plano => "arch",
|
||||
other => {
|
||||
return hermesllm::ProviderId::try_from(other.to_string().as_str())
|
||||
.expect("LlmProviderType should always map to a valid ProviderId")
|
||||
}
|
||||
};
|
||||
hermesllm::ProviderId::try_from(provider_str)
|
||||
hermesllm::ProviderId::try_from(self.to_string().as_str())
|
||||
.expect("LlmProviderType should always map to a valid ProviderId")
|
||||
}
|
||||
}
|
||||
|
|
@ -597,14 +586,14 @@ mod test {
|
|||
},
|
||||
LlmProvider {
|
||||
name: "arch-router".to_string(),
|
||||
provider_interface: LlmProviderType::Arch,
|
||||
provider_interface: LlmProviderType::Plano,
|
||||
model: Some("Arch-Router".to_string()),
|
||||
internal: Some(true),
|
||||
..Default::default()
|
||||
},
|
||||
LlmProvider {
|
||||
name: "plano-orchestrator".to_string(),
|
||||
provider_interface: LlmProviderType::Arch,
|
||||
provider_interface: LlmProviderType::Plano,
|
||||
model: Some("Plano-Orchestrator".to_string()),
|
||||
internal: Some(true),
|
||||
..Default::default()
|
||||
|
|
|
|||
|
|
@ -33,4 +33,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
|
|||
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
|
||||
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
|
||||
pub const BRIGHT_STAFF_SERVICE_NAME: &str = "brightstaff";
|
||||
pub const ARCH_FC_CLUSTER: &str = "arch";
|
||||
pub const PLANO_FC_CLUSTER: &str = "plano";
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ mod tests {
|
|||
ProviderId::Mistral
|
||||
);
|
||||
assert_eq!(ProviderId::try_from("groq").unwrap(), ProviderId::Groq);
|
||||
assert_eq!(ProviderId::try_from("arch").unwrap(), ProviderId::Arch);
|
||||
assert_eq!(ProviderId::try_from("plano").unwrap(), ProviderId::Plano);
|
||||
|
||||
// Test aliases
|
||||
assert_eq!(ProviderId::try_from("google").unwrap(), ProviderId::Gemini);
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ pub enum ProviderId {
|
|||
Gemini,
|
||||
Anthropic,
|
||||
GitHub,
|
||||
Arch,
|
||||
Plano,
|
||||
AzureOpenAI,
|
||||
XAI,
|
||||
TogetherAI,
|
||||
|
|
@ -58,7 +58,7 @@ impl TryFrom<&str> for ProviderId {
|
|||
"google" => Ok(ProviderId::Gemini), // alias
|
||||
"anthropic" => Ok(ProviderId::Anthropic),
|
||||
"github" => Ok(ProviderId::GitHub),
|
||||
"arch" => Ok(ProviderId::Arch),
|
||||
"plano" => Ok(ProviderId::Plano),
|
||||
"azure_openai" => Ok(ProviderId::AzureOpenAI),
|
||||
"xai" => Ok(ProviderId::XAI),
|
||||
"together_ai" => Ok(ProviderId::TogetherAI),
|
||||
|
|
@ -135,7 +135,7 @@ impl ProviderId {
|
|||
| ProviderId::Groq
|
||||
| ProviderId::Mistral
|
||||
| ProviderId::Deepseek
|
||||
| ProviderId::Arch
|
||||
| ProviderId::Plano
|
||||
| ProviderId::Gemini
|
||||
| ProviderId::GitHub
|
||||
| ProviderId::AzureOpenAI
|
||||
|
|
@ -153,7 +153,7 @@ impl ProviderId {
|
|||
| ProviderId::Groq
|
||||
| ProviderId::Mistral
|
||||
| ProviderId::Deepseek
|
||||
| ProviderId::Arch
|
||||
| ProviderId::Plano
|
||||
| ProviderId::Gemini
|
||||
| ProviderId::GitHub
|
||||
| ProviderId::AzureOpenAI
|
||||
|
|
@ -219,7 +219,7 @@ impl Display for ProviderId {
|
|||
ProviderId::Gemini => write!(f, "Gemini"),
|
||||
ProviderId::Anthropic => write!(f, "Anthropic"),
|
||||
ProviderId::GitHub => write!(f, "GitHub"),
|
||||
ProviderId::Arch => write!(f, "Arch"),
|
||||
ProviderId::Plano => write!(f, "Plano"),
|
||||
ProviderId::AzureOpenAI => write!(f, "azure_openai"),
|
||||
ProviderId::XAI => write!(f, "xai"),
|
||||
ProviderId::TogetherAI => write!(f, "together_ai"),
|
||||
|
|
|
|||
|
|
@ -873,7 +873,7 @@ impl HttpContext for StreamContext {
|
|||
// ensure that the provider has an endpoint if the access key is missing else return a bad request
|
||||
if self.llm_provider.as_ref().unwrap().endpoint.is_none()
|
||||
&& self.llm_provider.as_ref().unwrap().provider_interface
|
||||
!= LlmProviderType::Arch
|
||||
!= LlmProviderType::Plano
|
||||
{
|
||||
self.send_server_error(error, Some(StatusCode::BAD_REQUEST));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.3.0
|
||||
|
||||
overrides:
|
||||
orchestrator_model: plano/katanemo/Plano-Orchestrator-4B
|
||||
agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B
|
||||
|
||||
agents:
|
||||
- id: weather_agent
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1.0
|
||||
|
||||
overrides:
|
||||
router_model: Arch-Router
|
||||
llm_routing_model: Arch-Router
|
||||
|
||||
listeners:
|
||||
egress_traffic:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.3.0
|
||||
|
||||
overrides:
|
||||
router_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
|
||||
listeners:
|
||||
- type: model
|
||||
|
|
|
|||
|
|
@ -254,7 +254,7 @@ Using Ollama (recommended for local development)
|
|||
.. code-block:: yaml
|
||||
|
||||
overrides:
|
||||
router_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
llm_routing_model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
|
||||
model_providers:
|
||||
- model: plano/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
||||
|
|
@ -323,7 +323,7 @@ vLLM provides higher throughput and GPU optimizations suitable for production de
|
|||
.. code-block:: yaml
|
||||
|
||||
overrides:
|
||||
router_model: plano/Arch-Router
|
||||
llm_routing_model: plano/Arch-Router
|
||||
|
||||
model_providers:
|
||||
- model: plano/Arch-Router
|
||||
|
|
|
|||
|
|
@ -404,10 +404,11 @@ Using vLLM
|
|||
.. code-block:: yaml
|
||||
|
||||
overrides:
|
||||
orchestrator_model: plano/katanemo/Plano-Orchestrator-4B
|
||||
agent_orchestration_model: plano/katanemo/Plano-Orchestrator-4B
|
||||
|
||||
model_providers:
|
||||
- model: plano/katanemo/Plano-Orchestrator-4B
|
||||
- model: katanemo/Plano-Orchestrator-4B
|
||||
provider_interface: plano
|
||||
base_url: http://<your-server-ip>:8000
|
||||
|
||||
5. **Verify the server is running**
|
||||
|
|
|
|||
|
|
@ -107,11 +107,11 @@ model_providers:
|
|||
- internal: true
|
||||
model: Arch-Function
|
||||
name: arch-function
|
||||
provider_interface: arch
|
||||
provider_interface: plano
|
||||
- internal: true
|
||||
model: Plano-Orchestrator
|
||||
name: plano-orchestrator
|
||||
provider_interface: arch
|
||||
provider_interface: plano
|
||||
prompt_targets:
|
||||
- description: Get current weather at a location.
|
||||
endpoint:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue