mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
adding more changes
This commit is contained in:
parent
f13fc76a4a
commit
85ab948b13
13 changed files with 4 additions and 366 deletions
|
|
@ -30,8 +30,6 @@ pub fn get_llm_provider(
|
|||
ProviderHint::Name(name) => llm_providers.get(&name),
|
||||
});
|
||||
|
||||
info!("selected provider: maybe_provider: {:?}", maybe_provider);
|
||||
|
||||
if let Some(provider) = maybe_provider {
|
||||
return provider;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,8 +3,6 @@ use proxy_wasm::traits::*;
|
|||
use proxy_wasm::types::*;
|
||||
|
||||
mod filter_context;
|
||||
mod llm_routing;
|
||||
mod llm_routing_consts;
|
||||
mod metrics;
|
||||
mod stream_context;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,106 +0,0 @@
|
|||
// use std::rc::Rc;
|
||||
// use std::time::Duration;
|
||||
|
||||
// use common::api::open_ai::{ChatCompletionsRequest, Message};
|
||||
// use common::configuration::LlmProvider;
|
||||
// use common::consts::{ARCH_INTERNAL_CLUSTER_NAME, ARCH_UPSTREAM_HOST_HEADER};
|
||||
// use common::errors::ServerError;
|
||||
// use common::http::{CallArgs, Client};
|
||||
// use log::{info, warn};
|
||||
// use proxy_wasm::traits::HttpContext;
|
||||
// use proxy_wasm::types::Action;
|
||||
|
||||
// use crate::llm_routing_consts::SYSTEM_PROMPT;
|
||||
// use crate::stream_context::{CallContext, StreamContext};
|
||||
|
||||
// pub trait Routing {
|
||||
// fn route(&self) -> Action;
|
||||
// }
|
||||
|
||||
// impl Routing for StreamContext {
|
||||
// fn route(&self) -> Action {
|
||||
// let usage_based_providers = self
|
||||
// .llm_providers
|
||||
// .iter()
|
||||
// .filter(|(_, provider)| provider.usage.is_some())
|
||||
// .map(|(_, provider)| provider.clone())
|
||||
// .collect::<Vec<Rc<LlmProvider>>>();
|
||||
|
||||
// info!(
|
||||
// "usage based providers found: {}",
|
||||
// usage_based_providers
|
||||
// .iter()
|
||||
// .map(|provider| provider.name.clone())
|
||||
// .collect::<Vec<String>>()
|
||||
// .join(", ")
|
||||
// );
|
||||
|
||||
// if usage_based_providers.is_empty() {
|
||||
// self.set_http_request_body(
|
||||
// 0,
|
||||
// self.request_size.unwrap(),
|
||||
// self.request_body.as_ref().unwrap().as_bytes(),
|
||||
// );
|
||||
// return Action::Continue;
|
||||
// }
|
||||
|
||||
// let llm_routes_str = r#"- name: gpt-4o
|
||||
// description: simple requests, basic fact retrieval, easy to answer
|
||||
// - name: o4-mini()
|
||||
// description: complex reasoning problem, require multi step answer"#;
|
||||
|
||||
// let chat_completions_request_messages_str =
|
||||
// serde_json::to_string(&self.chat_completion_request.as_ref().unwrap().messages)
|
||||
// .expect("failed to serialize llm routing request messages");
|
||||
|
||||
// let system_prompt_formatted = SYSTEM_PROMPT
|
||||
// .replace("{routes}", llm_routes_str)
|
||||
// .replace("{conversation}", &chat_completions_request_messages_str);
|
||||
|
||||
// let message = Message {
|
||||
// role: "user".to_string(),
|
||||
// content: Some(system_prompt_formatted),
|
||||
// model: None,
|
||||
// tool_calls: None,
|
||||
// tool_call_id: None,
|
||||
// };
|
||||
|
||||
// let llm_routing_request = ChatCompletionsRequest {
|
||||
// model: "cotran2/llama-1b-4-26".to_string(),
|
||||
// messages: vec![message],
|
||||
// tools: None,
|
||||
// stream: false,
|
||||
// stream_options: None,
|
||||
// metadata: None,
|
||||
// };
|
||||
|
||||
// let llm_routing_request_str = serde_json::to_string(&llm_routing_request)
|
||||
// .expect("failed to serialize llm routing request");
|
||||
|
||||
// let headers = vec![
|
||||
// (":method", "POST"),
|
||||
// (ARCH_UPSTREAM_HOST_HEADER, "gcp_hosted_outer_llm"),
|
||||
// (":path", "/v1/chat/completions"),
|
||||
// (":authority", "gcp_hosted_outer_llm"),
|
||||
// ("content-type", "application/json"),
|
||||
// ("x-envoy-max-retries", "3"),
|
||||
// ("x-envoy-upstream-rq-timeout-ms", "5000"),
|
||||
// ];
|
||||
|
||||
// let call_args = CallArgs::new(
|
||||
// ARCH_INTERNAL_CLUSTER_NAME,
|
||||
// "/v1/chat/completions",
|
||||
// headers,
|
||||
// llm_routing_request_str.as_bytes().into(),
|
||||
// vec![],
|
||||
// Duration::from_secs(5),
|
||||
// );
|
||||
|
||||
// if let Err(e) = self.http_call(call_args, CallContext {}) {
|
||||
// warn!("failed to call LLM provider: {}", e);
|
||||
// self.send_server_error(ServerError::HttpDispatch(e), None);
|
||||
// }
|
||||
|
||||
// Action::Pause
|
||||
// }
|
||||
// }
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
// pub const SYSTEM_PROMPT: &str = r#"
|
||||
// You are an advanced Routing Assistant designed to select the optimal route based on user requests.
|
||||
// Your task is to analyze conversations and match them to the most appropriate predefined route.
|
||||
// Review the available routes config:
|
||||
|
||||
// # ROUTES CONFIG START
|
||||
// {routes}
|
||||
// # ROUTES CONFIG END
|
||||
|
||||
// Examine the following conversation between a user and an assistant:
|
||||
|
||||
// # CONVERSATION START
|
||||
// {conversation}
|
||||
// # CONVERSATION END
|
||||
|
||||
// Your goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:
|
||||
|
||||
// 1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.
|
||||
// 2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).
|
||||
// 3. Find the route that best matches.
|
||||
// 4. Use context clues from the entire conversation to determine the best fit.
|
||||
// 5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.
|
||||
// 6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''.
|
||||
|
||||
// # OUTPUT FORMAT
|
||||
// Your final output must follow this JSON format:
|
||||
// {
|
||||
// "route": "route_name" # The matched route name, or empty string '' if no match
|
||||
// }
|
||||
|
||||
// Based on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.
|
||||
// "#;
|
||||
|
|
@ -426,7 +426,6 @@ impl HttpContext for StreamContext {
|
|||
self.request_size = Some(body_size);
|
||||
|
||||
return Action::Continue;
|
||||
// return self.route();
|
||||
}
|
||||
|
||||
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue