mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
more updates
This commit is contained in:
parent
e7eb77383f
commit
7f90124bd1
29 changed files with 375 additions and 133 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -101,9 +101,6 @@ venv.bak/
|
||||||
# mypy
|
# mypy
|
||||||
.mypy_cache/
|
.mypy_cache/
|
||||||
|
|
||||||
# VSCode stuff:
|
|
||||||
.vscode/
|
|
||||||
|
|
||||||
# MacOS Metadata
|
# MacOS Metadata
|
||||||
*.DS_Store
|
*.DS_Store
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,20 +72,23 @@ properties:
|
||||||
type: string
|
type: string
|
||||||
default:
|
default:
|
||||||
type: boolean
|
type: boolean
|
||||||
# endpoint field is deprecated, use base_url instead
|
|
||||||
endpoint:
|
|
||||||
type: string
|
|
||||||
base_url:
|
base_url:
|
||||||
type: string
|
type: string
|
||||||
protocol:
|
|
||||||
type: string
|
|
||||||
enum:
|
|
||||||
- http
|
|
||||||
- https
|
|
||||||
http_host:
|
http_host:
|
||||||
type: string
|
type: string
|
||||||
usage:
|
routing_preferences:
|
||||||
type: string
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- name
|
||||||
|
- description
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
|
|
|
||||||
5
arch/tools/.vscode/settings.json
vendored
Normal file
5
arch/tools/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
||||||
|
{
|
||||||
|
"cSpell.words": [
|
||||||
|
"BRIGHTSTAFF"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -95,6 +95,8 @@ def validate_and_render_schema():
|
||||||
updated_llm_providers = []
|
updated_llm_providers = []
|
||||||
llm_provider_name_set = set()
|
llm_provider_name_set = set()
|
||||||
llms_with_usage = []
|
llms_with_usage = []
|
||||||
|
model_name_keys = set()
|
||||||
|
model_usage_name_keys = set()
|
||||||
for llm_provider in config_yaml["llm_providers"]:
|
for llm_provider in config_yaml["llm_providers"]:
|
||||||
if llm_provider.get("usage", None):
|
if llm_provider.get("usage", None):
|
||||||
llms_with_usage.append(llm_provider["name"])
|
llms_with_usage.append(llm_provider["name"])
|
||||||
|
|
@ -104,6 +106,11 @@ def validate_and_render_schema():
|
||||||
)
|
)
|
||||||
|
|
||||||
model_name = llm_provider.get("model")
|
model_name = llm_provider.get("model")
|
||||||
|
if model_name in model_name_keys:
|
||||||
|
raise Exception(
|
||||||
|
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
|
||||||
|
)
|
||||||
|
model_name_keys.add(model_name)
|
||||||
if llm_provider.get("name") is None:
|
if llm_provider.get("name") is None:
|
||||||
llm_provider["name"] = model_name
|
llm_provider["name"] = model_name
|
||||||
|
|
||||||
|
|
@ -119,6 +126,20 @@ def validate_and_render_schema():
|
||||||
f"Unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
|
f"Unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if model_id in model_name_keys:
|
||||||
|
raise Exception(
|
||||||
|
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
|
||||||
|
)
|
||||||
|
model_name_keys.add(model_id)
|
||||||
|
|
||||||
|
for routing_preference in llm_provider.get("routing_preferences", []):
|
||||||
|
if routing_preference.get("name") in model_usage_name_keys:
|
||||||
|
raise Exception(
|
||||||
|
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
|
||||||
|
)
|
||||||
|
model_usage_name_keys.add(routing_preference.get("name"))
|
||||||
|
|
||||||
|
llm_provider["model"] = model_id
|
||||||
llm_provider["provider_interface"] = provider
|
llm_provider["provider_interface"] = provider
|
||||||
llm_provider_name_set.add(llm_provider.get("name"))
|
llm_provider_name_set.add(llm_provider.get("name"))
|
||||||
provider = None
|
provider = None
|
||||||
|
|
@ -132,21 +153,14 @@ def validate_and_render_schema():
|
||||||
del llm_provider["provider"]
|
del llm_provider["provider"]
|
||||||
updated_llm_providers.append(llm_provider)
|
updated_llm_providers.append(llm_provider)
|
||||||
|
|
||||||
if llm_provider.get("endpoint") and llm_provider.get("base_url"):
|
if llm_provider.get("base_url", None):
|
||||||
raise Exception("Please provide either endpoint or base_url, not both")
|
|
||||||
|
|
||||||
if llm_provider.get("endpoint", None):
|
|
||||||
endpoint = llm_provider["endpoint"]
|
|
||||||
protocol = llm_provider.get("protocol", "http")
|
|
||||||
llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
|
|
||||||
endpoint, protocol
|
|
||||||
)
|
|
||||||
llms_with_endpoint.append(llm_provider)
|
|
||||||
elif llm_provider.get("base_url", None):
|
|
||||||
base_url = llm_provider["base_url"]
|
base_url = llm_provider["base_url"]
|
||||||
urlparse_result = urlparse(base_url)
|
urlparse_result = urlparse(base_url)
|
||||||
if llm_provider.get("port"):
|
url_path = urlparse_result.path
|
||||||
raise Exception("Please provider port in base_url")
|
if url_path and url_path != "/":
|
||||||
|
raise Exception(
|
||||||
|
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
|
||||||
|
)
|
||||||
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
|
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
|
||||||
"http",
|
"http",
|
||||||
"https",
|
"https",
|
||||||
|
|
|
||||||
21
crates/.vscode/launch.json
vendored
Normal file
21
crates/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Debug Brightstaff",
|
||||||
|
"type": "lldb",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}/target/debug/brightstaff",
|
||||||
|
"args": [],
|
||||||
|
"cwd": "${workspaceFolder}",
|
||||||
|
"stopOnEntry": false,
|
||||||
|
"sourceLanguages": ["rust"],
|
||||||
|
"env": {
|
||||||
|
"RUST_LOG": "debug",
|
||||||
|
"RUST_BACKTRACE": "1",
|
||||||
|
"ARCH_CONFIG_PATH_RENDERED": "../demos/use_cases/preference_based_routing/arch_config_rendered.yaml"
|
||||||
|
},
|
||||||
|
"preLaunchTask": "rust: cargo build"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
21
crates/.vscode/tasks.json
vendored
Normal file
21
crates/.vscode/tasks.json
vendored
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"type": "cargo",
|
||||||
|
"command": "build",
|
||||||
|
"args": [
|
||||||
|
"--bin",
|
||||||
|
"brightstaff"
|
||||||
|
],
|
||||||
|
"problemMatcher": [
|
||||||
|
"$rustc"
|
||||||
|
],
|
||||||
|
"group": {
|
||||||
|
"kind": "build",
|
||||||
|
"isDefault": true
|
||||||
|
},
|
||||||
|
"label": "rust: cargo build"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -12,7 +12,7 @@ use hyper::{Request, Response, StatusCode};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use tokio_stream::wrappers::ReceiverStream;
|
use tokio_stream::wrappers::ReceiverStream;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
use tracing::{debug, info, trace, warn};
|
use tracing::{debug, info, warn};
|
||||||
|
|
||||||
use crate::router::llm_router::RouterService;
|
use crate::router::llm_router::RouterService;
|
||||||
|
|
||||||
|
|
@ -81,8 +81,8 @@ pub async fn chat_completions(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
trace!(
|
debug!(
|
||||||
"arch-router request body: {}",
|
"arch-router request received: {}",
|
||||||
&serde_json::to_string(&chat_completion_request).unwrap()
|
&serde_json::to_string(&chat_completion_request).unwrap()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -102,7 +102,7 @@ pub async fn chat_completions(
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.and_then(|s| serde_yaml::from_str(s).ok());
|
.and_then(|s| serde_yaml::from_str(s).ok());
|
||||||
|
|
||||||
debug!("usage preferences: {:?}", usage_preferences);
|
debug!("usage preferences from request: {:?}", usage_preferences);
|
||||||
|
|
||||||
let mut determined_route = match router_service
|
let mut determined_route = match router_service
|
||||||
.determine_route(
|
.determine_route(
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let _tracer_provider = init_tracer();
|
let _tracer_provider = init_tracer();
|
||||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"current working directory: {}",
|
||||||
|
env::current_dir().unwrap().display()
|
||||||
|
);
|
||||||
// loading arch_config.yaml file
|
// loading arch_config.yaml file
|
||||||
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
|
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
|
||||||
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
|
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use common::{
|
use common::{
|
||||||
configuration::{LlmProvider, LlmRoute, ModelUsagePreference},
|
configuration::{LlmProvider, ModelUsagePreference, RoutingPreference},
|
||||||
consts::ARCH_PROVIDER_HINT_HEADER,
|
consts::ARCH_PROVIDER_HINT_HEADER,
|
||||||
};
|
};
|
||||||
use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
|
use hermesllm::providers::openai::types::{ChatCompletionsResponse, ContentType, Message};
|
||||||
|
|
@ -44,11 +44,14 @@ impl RouterService {
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let providers_with_usage = providers
|
let providers_with_usage = providers
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|provider| provider.usage.is_some())
|
.filter(|provider| provider.routing_preferences.is_some())
|
||||||
.cloned()
|
.cloned()
|
||||||
.collect::<Vec<LlmProvider>>();
|
.collect::<Vec<LlmProvider>>();
|
||||||
|
|
||||||
let llm_routes: Vec<LlmRoute> = providers_with_usage.iter().map(LlmRoute::from).collect();
|
let llm_routes: Vec<RoutingPreference> = providers_with_usage
|
||||||
|
.iter()
|
||||||
|
.flat_map(|provider| provider.routing_preferences.clone().unwrap_or_default())
|
||||||
|
.collect();
|
||||||
|
|
||||||
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
|
let router_model = Arc::new(router_model_v1::RouterModelV1::new(
|
||||||
llm_routes,
|
llm_routes,
|
||||||
|
|
@ -156,6 +159,12 @@ impl RouterService {
|
||||||
router_response_time.as_millis()
|
router_response_time.as_millis()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if let Some(ref route) = route_name {
|
||||||
|
if route == "other" {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(route_name)
|
Ok(route_name)
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use common::{
|
use common::{
|
||||||
configuration::{LlmRoute, ModelUsagePreference},
|
configuration::{ModelUsagePreference, RoutingPreference},
|
||||||
consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
|
consts::{SYSTEM_ROLE, TOOL_ROLE, USER_ROLE},
|
||||||
};
|
};
|
||||||
use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
|
use hermesllm::providers::openai::types::{ChatCompletionsRequest, ContentType, Message};
|
||||||
|
|
@ -36,7 +36,11 @@ pub struct RouterModelV1 {
|
||||||
max_token_length: usize,
|
max_token_length: usize,
|
||||||
}
|
}
|
||||||
impl RouterModelV1 {
|
impl RouterModelV1 {
|
||||||
pub fn new(llm_routes: Vec<LlmRoute>, routing_model: String, max_token_length: usize) -> Self {
|
pub fn new(
|
||||||
|
llm_routes: Vec<RoutingPreference>,
|
||||||
|
routing_model: String,
|
||||||
|
max_token_length: usize,
|
||||||
|
) -> Self {
|
||||||
let llm_route_json_str =
|
let llm_route_json_str =
|
||||||
serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
|
serde_json::to_string(&llm_routes).unwrap_or_else(|_| "[]".to_string());
|
||||||
RouterModelV1 {
|
RouterModelV1 {
|
||||||
|
|
@ -138,9 +142,9 @@ impl RouterModel for RouterModelV1 {
|
||||||
let llm_route_json = usage_preferences
|
let llm_route_json = usage_preferences
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|prefs| {
|
.map(|prefs| {
|
||||||
let llm_route: Vec<LlmRoute> = prefs
|
let llm_route: Vec<RoutingPreference> = prefs
|
||||||
.iter()
|
.iter()
|
||||||
.map(|pref| LlmRoute {
|
.map(|pref| RoutingPreference {
|
||||||
name: pref.name.clone(),
|
name: pref.name.clone(),
|
||||||
description: pref.usage.clone().unwrap_or_default(),
|
description: pref.usage.clone().unwrap_or_default(),
|
||||||
})
|
})
|
||||||
|
|
@ -255,7 +259,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||||
|
|
||||||
|
|
@ -314,7 +318,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||||
|
|
||||||
|
|
@ -379,7 +383,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 235);
|
||||||
|
|
||||||
|
|
@ -440,7 +444,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 200);
|
||||||
|
|
||||||
|
|
@ -501,7 +505,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), 230);
|
||||||
|
|
||||||
|
|
@ -569,7 +573,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||||
|
|
||||||
|
|
@ -639,7 +643,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
let routing_model = "test-model".to_string();
|
let routing_model = "test-model".to_string();
|
||||||
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
let router = RouterModelV1::new(llm_routes, routing_model.clone(), usize::MAX);
|
||||||
|
|
||||||
|
|
@ -716,7 +720,7 @@ Based on your analysis, provide your response in the following JSON formats if y
|
||||||
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
{"name": "Speech Recognition", "description": "Converting spoken language into written text"}
|
||||||
]
|
]
|
||||||
"#;
|
"#;
|
||||||
let llm_routes = serde_json::from_str::<Vec<LlmRoute>>(routes_str).unwrap();
|
let llm_routes = serde_json::from_str::<Vec<RoutingPreference>>(routes_str).unwrap();
|
||||||
|
|
||||||
let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
|
let router = RouterModelV1::new(llm_routes, "test-model".to_string(), 2000);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -187,24 +187,11 @@ pub struct ModelUsagePreference {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct LlmRoute {
|
pub struct RoutingPreference {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub description: String,
|
pub description: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&LlmProvider> for LlmRoute {
|
|
||||||
fn from(provider: &LlmProvider) -> Self {
|
|
||||||
Self {
|
|
||||||
name: provider.name.to_string(),
|
|
||||||
description: provider
|
|
||||||
.usage
|
|
||||||
.as_ref()
|
|
||||||
.cloned()
|
|
||||||
.unwrap_or_else(|| "No description available".to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
//TODO: use enum for model, but if there is a new model, we need to update the code
|
//TODO: use enum for model, but if there is a new model, we need to update the code
|
||||||
pub struct LlmProvider {
|
pub struct LlmProvider {
|
||||||
|
|
@ -218,6 +205,7 @@ pub struct LlmProvider {
|
||||||
pub port: Option<u16>,
|
pub port: Option<u16>,
|
||||||
pub rate_limits: Option<LlmRatelimit>,
|
pub rate_limits: Option<LlmRatelimit>,
|
||||||
pub usage: Option<String>,
|
pub usage: Option<String>,
|
||||||
|
pub routing_preferences: Option<Vec<RoutingPreference>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait IntoModels {
|
pub trait IntoModels {
|
||||||
|
|
@ -256,6 +244,7 @@ impl Default for LlmProvider {
|
||||||
port: None,
|
port: None,
|
||||||
rate_limits: None,
|
rate_limits: None,
|
||||||
usage: None,
|
usage: None,
|
||||||
|
routing_preferences: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -368,7 +357,7 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_deserialize_configuration() {
|
fn test_deserialize_configuration() {
|
||||||
let ref_config = fs::read_to_string(
|
let ref_config = fs::read_to_string(
|
||||||
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
|
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
|
||||||
)
|
)
|
||||||
.expect("reference config file not found");
|
.expect("reference config file not found");
|
||||||
|
|
||||||
|
|
@ -429,7 +418,7 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_tool_conversion() {
|
fn test_tool_conversion() {
|
||||||
let ref_config = fs::read_to_string(
|
let ref_config = fs::read_to_string(
|
||||||
"../../docs/source/resources/includes/arch_config_full_reference.yaml",
|
"../../docs/source/resources/includes/arch_config_full_reference_rendered.yaml",
|
||||||
)
|
)
|
||||||
.expect("reference config file not found");
|
.expect("reference config file not found");
|
||||||
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
|
let config: super::Configuration = serde_yaml::from_str(&ref_config).unwrap();
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,16 @@ impl TryFrom<Vec<LlmProvider>> for LlmProviders {
|
||||||
let name = llm_provider.name.clone();
|
let name = llm_provider.name.clone();
|
||||||
if llm_providers
|
if llm_providers
|
||||||
.providers
|
.providers
|
||||||
.insert(name.clone(), llm_provider)
|
.insert(name.clone(), llm_provider.clone())
|
||||||
|
.is_some()
|
||||||
|
{
|
||||||
|
return Err(LlmProvidersNewError::DuplicateName(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
// also add model_id as key for provider lookup
|
||||||
|
if llm_providers
|
||||||
|
.providers
|
||||||
|
.insert(llm_provider.model.clone().unwrap(), llm_provider)
|
||||||
.is_some()
|
.is_some()
|
||||||
{
|
{
|
||||||
return Err(LlmProvidersNewError::DuplicateName(name));
|
return Err(LlmProvidersNewError::DuplicateName(name));
|
||||||
|
|
|
||||||
|
|
@ -113,16 +113,10 @@ impl StreamContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"request received: llm provider hint: {}, selected llm: {}, model: {}",
|
"request received: llm provider hint: {}, selected provider: {}",
|
||||||
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER)
|
||||||
.unwrap_or_default(),
|
.unwrap_or_default(),
|
||||||
self.llm_provider.as_ref().unwrap().name,
|
self.llm_provider.as_ref().unwrap().name
|
||||||
self.llm_provider
|
|
||||||
.as_ref()
|
|
||||||
.unwrap()
|
|
||||||
.model
|
|
||||||
.as_ref()
|
|
||||||
.unwrap_or(&String::new())
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,6 +307,11 @@ impl HttpContext for StreamContext {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"on_http_request_body: deserialized body: {}",
|
||||||
|
serde_json::to_string(&deserialized_body).unwrap_or_default()
|
||||||
|
);
|
||||||
|
|
||||||
self.user_message = deserialized_body
|
self.user_message = deserialized_body
|
||||||
.messages
|
.messages
|
||||||
.iter()
|
.iter()
|
||||||
|
|
@ -349,8 +348,8 @@ impl HttpContext for StreamContext {
|
||||||
};
|
};
|
||||||
|
|
||||||
info!(
|
info!(
|
||||||
"on_http_request_body: provider: {}, model requested: {}, model selected: {}",
|
"on_http_request_body: provider: {}, model requested (in body): {}, model selected: {}",
|
||||||
self.llm_provider().name,
|
self.llm_provider().provider_interface,
|
||||||
model_requested,
|
model_requested,
|
||||||
model_name.unwrap_or(&"None".to_string()),
|
model_name.unwrap_or(&"None".to_string()),
|
||||||
);
|
);
|
||||||
|
|
|
||||||
15
demos/samples_java/weather_forcecast_service/.vscode/launch.json
vendored
Normal file
15
demos/samples_java/weather_forcecast_service/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "java",
|
||||||
|
"name": "WeatherForecastApplication",
|
||||||
|
"request": "launch",
|
||||||
|
"mainClass": "weather.WeatherForecastApplication",
|
||||||
|
"projectName": "weather-forecast-service"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
@ -10,7 +10,7 @@ listeners:
|
||||||
llm_providers:
|
llm_providers:
|
||||||
|
|
||||||
- model: openai/llama3.2
|
- model: openai/llama3.2
|
||||||
endpoint: host.docker.internal:11434
|
base_url: http://host.docker.internal:11434
|
||||||
default: true
|
default: true
|
||||||
|
|
||||||
system_prompt: |
|
system_prompt: |
|
||||||
|
|
|
||||||
|
|
@ -9,22 +9,21 @@ listeners:
|
||||||
|
|
||||||
llm_providers:
|
llm_providers:
|
||||||
|
|
||||||
- access_key: $OPENAI_API_KEY
|
- model: openai/gpt-4o-mini
|
||||||
model: openai/gpt-4o-mini
|
access_key: $OPENAI_API_KEY
|
||||||
|
|
||||||
- access_key: $OPENAI_API_KEY
|
|
||||||
model: openai/gpt-4.1
|
|
||||||
default: true
|
default: true
|
||||||
|
|
||||||
- name: code_generation
|
- model: openai/gpt-4o
|
||||||
access_key: $OPENAI_API_KEY
|
access_key: $OPENAI_API_KEY
|
||||||
model: openai/gpt-4.1
|
routing_preferences:
|
||||||
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
- name: code understanding
|
||||||
|
description: understand and explain existing code snippets, functions, or libraries
|
||||||
|
|
||||||
- name: code_understanding
|
- model: openai/gpt-4.1
|
||||||
access_key: $OPENAI_API_KEY
|
access_key: $OPENAI_API_KEY
|
||||||
model: openai/gpt-4o-mini
|
routing_preferences:
|
||||||
usage: understand and explain existing code snippets, functions, or libraries
|
- name: code generation
|
||||||
|
description: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
random_sampling: 100
|
random_sampling: 100
|
||||||
|
|
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
version: v0.1.0
|
|
||||||
|
|
||||||
routing:
|
|
||||||
model: Arch-Router
|
|
||||||
llm_provider: arch-router
|
|
||||||
|
|
||||||
listeners:
|
|
||||||
egress_traffic:
|
|
||||||
address: 0.0.0.0
|
|
||||||
port: 12000
|
|
||||||
message_format: openai
|
|
||||||
timeout: 30s
|
|
||||||
|
|
||||||
llm_providers:
|
|
||||||
|
|
||||||
- name: arch-router
|
|
||||||
provider_interface: arch
|
|
||||||
model: hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M
|
|
||||||
endpoint: host.docker.internal:11434
|
|
||||||
|
|
||||||
- name: gpt-4o-mini
|
|
||||||
provider_interface: openai
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
model: gpt-4o-mini
|
|
||||||
|
|
||||||
- name: gpt-4.1
|
|
||||||
provider_interface: openai
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
model: gpt-4.1
|
|
||||||
default: true
|
|
||||||
|
|
||||||
- name: code_generation
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
provider_interface: openai
|
|
||||||
model: gpt-4.1
|
|
||||||
usage: generating new code snippets, functions, or boilerplate based on user prompts or requirements
|
|
||||||
|
|
||||||
- name: code_understanding
|
|
||||||
provider_interface: openai
|
|
||||||
access_key: $OPENAI_API_KEY
|
|
||||||
model: gpt-4.1
|
|
||||||
usage: understand and explain existing code snippets, functions, or libraries
|
|
||||||
|
|
||||||
tracing:
|
|
||||||
random_sampling: 100
|
|
||||||
|
|
@ -0,0 +1,29 @@
|
||||||
|
listeners:
|
||||||
|
egress_traffic:
|
||||||
|
address: 0.0.0.0
|
||||||
|
message_format: openai
|
||||||
|
port: 12000
|
||||||
|
timeout: 30s
|
||||||
|
llm_providers:
|
||||||
|
- access_key: $OPENAI_API_KEY
|
||||||
|
default: true
|
||||||
|
model: gpt-4o-mini
|
||||||
|
name: openai/gpt-4o-mini
|
||||||
|
provider_interface: openai
|
||||||
|
- access_key: $OPENAI_API_KEY
|
||||||
|
model: gpt-4o
|
||||||
|
name: openai/gpt-4o
|
||||||
|
provider_interface: openai
|
||||||
|
routing_preferences:
|
||||||
|
- description: b
|
||||||
|
name: code understanding
|
||||||
|
- access_key: $OPENAI_API_KEY
|
||||||
|
model: gpt-4.1
|
||||||
|
name: openai/gpt-4.1
|
||||||
|
provider_interface: openai
|
||||||
|
routing_preferences:
|
||||||
|
- description: a
|
||||||
|
name: code understanding
|
||||||
|
tracing:
|
||||||
|
random_sampling: 100
|
||||||
|
version: v0.1.0
|
||||||
|
|
@ -2,18 +2,18 @@ POST http://localhost:12000/v1/chat/completions
|
||||||
Content-Type: application/json
|
Content-Type: application/json
|
||||||
|
|
||||||
{
|
{
|
||||||
|
"model": "openai/gpt-4.1",
|
||||||
"messages": [
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": "hi"
|
"content": "hi"
|
||||||
}
|
}
|
||||||
],
|
]
|
||||||
"model": "none"
|
|
||||||
}
|
}
|
||||||
HTTP 200
|
HTTP 200
|
||||||
[Asserts]
|
[Asserts]
|
||||||
header "content-type" == "application/json"
|
header "content-type" == "application/json"
|
||||||
jsonpath "$.model" matches /^gpt-4.1/
|
jsonpath "$.model" matches /^gpt-4o-mini/
|
||||||
jsonpath "$.usage" != null
|
jsonpath "$.usage" != null
|
||||||
jsonpath "$.choices[0].message.content" != null
|
jsonpath "$.choices[0].message.content" != null
|
||||||
jsonpath "$.choices[0].message.role" == "assistant"
|
jsonpath "$.choices[0].message.role" == "assistant"
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ llm_providers:
|
||||||
model: mistral/mistral-8x7b
|
model: mistral/mistral-8x7b
|
||||||
|
|
||||||
- model: mistral/mistral-7b-instruct
|
- model: mistral/mistral-7b-instruct
|
||||||
endpoint: mistral_local
|
base_url: http://mistral_local
|
||||||
|
|
||||||
# provides a way to override default settings for the arch system
|
# provides a way to override default settings for the arch system
|
||||||
overrides:
|
overrides:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,95 @@
|
||||||
|
version: v0.1
|
||||||
|
|
||||||
|
listeners:
|
||||||
|
ingress_traffic:
|
||||||
|
address: 0.0.0.0
|
||||||
|
port: 10000
|
||||||
|
message_format: openai
|
||||||
|
timeout: 5s
|
||||||
|
egress_traffic:
|
||||||
|
address: 0.0.0.0
|
||||||
|
port: 12000
|
||||||
|
message_format: openai
|
||||||
|
timeout: 5s
|
||||||
|
|
||||||
|
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||||
|
endpoints:
|
||||||
|
app_server:
|
||||||
|
# value could be ip address or a hostname with port
|
||||||
|
# this could also be a list of endpoints for load balancing
|
||||||
|
# for example endpoint: [ ip1:port, ip2:port ]
|
||||||
|
endpoint: 127.0.0.1:80
|
||||||
|
# max time to wait for a connection to be established
|
||||||
|
connect_timeout: 0.005s
|
||||||
|
|
||||||
|
mistral_local:
|
||||||
|
endpoint: 127.0.0.1:8001
|
||||||
|
|
||||||
|
error_target:
|
||||||
|
endpoint: error_target_1
|
||||||
|
|
||||||
|
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
|
||||||
|
llm_providers:
|
||||||
|
- name: openai/gpt-4o
|
||||||
|
provider_interface: openai
|
||||||
|
access_key: $OPENAI_API_KEY
|
||||||
|
model: gpt-4o
|
||||||
|
default: true
|
||||||
|
|
||||||
|
- name: mistral/mistral-8x7b
|
||||||
|
provider_interface: mistral
|
||||||
|
access_key: $MISTRAL_API_KEY
|
||||||
|
model: mistral-8x7b
|
||||||
|
|
||||||
|
- name: mistral/mistral-7b-instruct
|
||||||
|
provider_interface: mistral
|
||||||
|
model: mistral-7b-instruct
|
||||||
|
base_url: http://mistral_local
|
||||||
|
|
||||||
|
# provides a way to override default settings for the arch system
|
||||||
|
overrides:
|
||||||
|
# By default Arch uses an NLI + embedding approach to match an incoming prompt to a prompt target.
|
||||||
|
# The intent matching threshold is kept at 0.80, you can override this behavior if you would like
|
||||||
|
prompt_target_intent_matching_threshold: 0.60
|
||||||
|
|
||||||
|
# default system prompt used by all prompt targets
|
||||||
|
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||||
|
|
||||||
|
prompt_guards:
|
||||||
|
input_guards:
|
||||||
|
jailbreak:
|
||||||
|
on_exception:
|
||||||
|
message: Looks like you're curious about my abilities, but I can only provide assistance within my programmed parameters.
|
||||||
|
|
||||||
|
prompt_targets:
|
||||||
|
- name: information_extraction
|
||||||
|
default: true
|
||||||
|
description: handel all scenarios that are question and answer in nature. Like summarization, information extraction, etc.
|
||||||
|
endpoint:
|
||||||
|
name: app_server
|
||||||
|
path: /agent/summary
|
||||||
|
http_method: POST
|
||||||
|
# Arch uses the default LLM and treats the response from the endpoint as the prompt to send to the LLM
|
||||||
|
auto_llm_dispatch_on_response: true
|
||||||
|
# override system prompt for this prompt target
|
||||||
|
system_prompt: You are a helpful information extraction assistant. Use the information that is provided to you.
|
||||||
|
|
||||||
|
- name: reboot_network_device
|
||||||
|
description: Reboot a specific network device
|
||||||
|
endpoint:
|
||||||
|
name: app_server
|
||||||
|
path: /agent/action
|
||||||
|
parameters:
|
||||||
|
- name: device_id
|
||||||
|
type: str
|
||||||
|
description: Identifier of the network device to reboot.
|
||||||
|
required: true
|
||||||
|
- name: confirmation
|
||||||
|
type: bool
|
||||||
|
description: Confirmation flag to proceed with reboot.
|
||||||
|
default: false
|
||||||
|
enum: [true, false]
|
||||||
|
|
||||||
|
tracing:
|
||||||
|
# sampling rate. Note by default Arch works on OpenTelemetry compatible tracing.
|
||||||
|
sampling_rate: 0.1
|
||||||
1
model_server/.vscode/launch.json
vendored
1
model_server/.vscode/launch.json
vendored
|
|
@ -4,6 +4,7 @@
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
"version": "0.2.0",
|
"version": "0.2.0",
|
||||||
"configurations": [
|
"configurations": [
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "model server",
|
"name": "model server",
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
|
|
|
||||||
7
model_server/.vscode/settings.json
vendored
Normal file
7
model_server/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"python.testing.pytestArgs": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true
|
||||||
|
}
|
||||||
15
tests/archgw/.vscode/launch.json
vendored
Normal file
15
tests/archgw/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
7
tests/archgw/.vscode/settings.json
vendored
Normal file
7
tests/archgw/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"python.testing.pytestArgs": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true
|
||||||
|
}
|
||||||
15
tests/e2e/.vscode/launch.json
vendored
Normal file
15
tests/e2e/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
7
tests/e2e/.vscode/settings.json
vendored
Normal file
7
tests/e2e/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"python.testing.pytestArgs": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true
|
||||||
|
}
|
||||||
15
tests/modelserver/.vscode/launch.json
vendored
Normal file
15
tests/modelserver/.vscode/launch.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Python Debugger: Current File",
|
||||||
|
"type": "debugpy",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${file}",
|
||||||
|
"console": "integratedTerminal"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
7
tests/modelserver/.vscode/settings.json
vendored
Normal file
7
tests/modelserver/.vscode/settings.json
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"python.testing.pytestArgs": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue