adding more changes

2026-06-26 15:39:40 +02:00 · 2025-05-12 13:09:22 -07:00 · 2025-05-12 13:09:22 -07:00 · 85ab948b13
commit 85ab948b13
parent f13fc76a4a
13 changed files with 4 additions and 366 deletions
--- a/crates/common/src/routing.rs
+++ b/crates/common/src/routing.rs
@ -30,8 +30,6 @@ pub fn get_llm_provider(
        ProviderHint::Name(name) => llm_providers.get(&name),
    });

-    info!("selected provider: maybe_provider: {:?}", maybe_provider);
-
    if let Some(provider) = maybe_provider {
        return provider;
    }
--- a/crates/llm_gateway/src/lib.rs
+++ b/crates/llm_gateway/src/lib.rs
@ -3,8 +3,6 @@ use proxy_wasm::traits::*;
 use proxy_wasm::types::*;

 mod filter_context;
-mod llm_routing;
-mod llm_routing_consts;
 mod metrics;
 mod stream_context;

--- a/crates/llm_gateway/src/llm_routing.rs
+++ b/crates/llm_gateway/src/llm_routing.rs
@ -1,106 +0,0 @@
-// use std::rc::Rc;
-// use std::time::Duration;
-
-// use common::api::open_ai::{ChatCompletionsRequest, Message};
-// use common::configuration::LlmProvider;
-// use common::consts::{ARCH_INTERNAL_CLUSTER_NAME, ARCH_UPSTREAM_HOST_HEADER};
-// use common::errors::ServerError;
-// use common::http::{CallArgs, Client};
-// use log::{info, warn};
-// use proxy_wasm::traits::HttpContext;
-// use proxy_wasm::types::Action;
-
-// use crate::llm_routing_consts::SYSTEM_PROMPT;
-// use crate::stream_context::{CallContext, StreamContext};
-
-// pub trait Routing {
-//     fn route(&self) -> Action;
-// }
-
-// impl Routing for StreamContext {
-//     fn route(&self) -> Action {
-//         let usage_based_providers = self
-//             .llm_providers
-//             .iter()
-//             .filter(|(_, provider)| provider.usage.is_some())
-//             .map(|(_, provider)| provider.clone())
-//             .collect::<Vec<Rc<LlmProvider>>>();
-
-//         info!(
-//             "usage based providers found: {}",
-//             usage_based_providers
-//                 .iter()
-//                 .map(|provider| provider.name.clone())
-//                 .collect::<Vec<String>>()
-//                 .join(", ")
-//         );
-
-//         if usage_based_providers.is_empty() {
-//             self.set_http_request_body(
-//                 0,
-//                 self.request_size.unwrap(),
-//                 self.request_body.as_ref().unwrap().as_bytes(),
-//             );
-//             return Action::Continue;
-//         }
-
-//         let llm_routes_str = r#"- name: gpt-4o
-//   description: simple requests, basic fact retrieval, easy to answer
-// - name: o4-mini()
-//   description: complex reasoning problem, require multi step answer"#;
-
-//         let chat_completions_request_messages_str =
-//             serde_json::to_string(&self.chat_completion_request.as_ref().unwrap().messages)
-//                 .expect("failed to serialize llm routing request messages");
-
-//         let system_prompt_formatted = SYSTEM_PROMPT
-//             .replace("{routes}", llm_routes_str)
-//             .replace("{conversation}", &chat_completions_request_messages_str);
-
-//         let message = Message {
-//             role: "user".to_string(),
-//             content: Some(system_prompt_formatted),
-//             model: None,
-//             tool_calls: None,
-//             tool_call_id: None,
-//         };
-
-//         let llm_routing_request = ChatCompletionsRequest {
-//             model: "cotran2/llama-1b-4-26".to_string(),
-//             messages: vec![message],
-//             tools: None,
-//             stream: false,
-//             stream_options: None,
-//             metadata: None,
-//         };
-
-//         let llm_routing_request_str = serde_json::to_string(&llm_routing_request)
-//             .expect("failed to serialize llm routing request");
-
-//         let headers = vec![
-//             (":method", "POST"),
-//             (ARCH_UPSTREAM_HOST_HEADER, "gcp_hosted_outer_llm"),
-//             (":path", "/v1/chat/completions"),
-//             (":authority", "gcp_hosted_outer_llm"),
-//             ("content-type", "application/json"),
-//             ("x-envoy-max-retries", "3"),
-//             ("x-envoy-upstream-rq-timeout-ms", "5000"),
-//         ];
-
-//         let call_args = CallArgs::new(
-//             ARCH_INTERNAL_CLUSTER_NAME,
-//             "/v1/chat/completions",
-//             headers,
-//             llm_routing_request_str.as_bytes().into(),
-//             vec![],
-//             Duration::from_secs(5),
-//         );
-
-//         if let Err(e) = self.http_call(call_args, CallContext {}) {
-//             warn!("failed to call LLM provider: {}", e);
-//             self.send_server_error(ServerError::HttpDispatch(e), None);
-//         }
-
-//         Action::Pause
-//     }
-// }
--- a/crates/llm_gateway/src/llm_routing_consts.rs
+++ b/crates/llm_gateway/src/llm_routing_consts.rs
@ -1,32 +0,0 @@
-// pub const SYSTEM_PROMPT: &str = r#"
-// You are an advanced Routing Assistant designed to select the optimal route based on user requests.
-// Your task is to analyze conversations and match them to the most appropriate predefined route.
-// Review the available routes config:
-
-// # ROUTES CONFIG START
-// {routes}
-// # ROUTES CONFIG END
-
-// Examine the following conversation between a user and an assistant:
-
-// # CONVERSATION START
-// {conversation}
-// # CONVERSATION END
-
-// Your goal is to identify the most appropriate route that matches the user's LATEST intent. Follow these steps:
-
-// 1. Carefully read and analyze the provided conversation, focusing on the user's latest request and the conversation scenario.
-// 2. Check if the user's request and scenario matches any of the routes in the routing configuration (focus on the description).
-// 3. Find the route that best matches.
-// 4. Use context clues from the entire conversation to determine the best fit.
-// 5. Return the best match possible. You only response the name of the route that best matches the user's request, use the exact name in the routes config.
-// 6. If no route relatively close to matches the user's latest intent or user last message is thank you or greeting, return an empty route ''.
-
-// # OUTPUT FORMAT
-// Your final output must follow this JSON format:
-// {
-//   "route": "route_name" # The matched route name, or empty string '' if no match
-// }
-
-// Based on your analysis, provide only the JSON object as your final output with no additional text, explanations, or whitespace.
-// "#;
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -426,7 +426,6 @@ impl HttpContext for StreamContext {
        self.request_size = Some(body_size);

        return Action::Continue;
-        // return self.route();
    }

    fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {