mirror of
https://github.com/katanemo/plano.git
synced 2026-06-29 15:49:40 +02:00
add more changes
This commit is contained in:
parent
cd90faf50c
commit
66a971b086
8 changed files with 35 additions and 9 deletions
|
|
@ -25,6 +25,7 @@ pub struct Configuration {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct Overrides {
|
||||
pub prompt_target_intent_matching_threshold: Option<f64>,
|
||||
pub optimize_context_window: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
|
|||
|
|
@ -137,9 +137,20 @@ impl HttpContext for StreamContext {
|
|||
.map(|(_, pt)| pt.into())
|
||||
.collect();
|
||||
|
||||
let mut metadata = deserialized_body.metadata.clone();
|
||||
|
||||
if let Some(overrides) = self.overrides.as_ref() {
|
||||
if overrides.optimize_context_window.unwrap_or_default() {
|
||||
if metadata.is_none() {
|
||||
metadata = Some(HashMap::new());
|
||||
}
|
||||
metadata.as_mut().unwrap().insert("optimize_context_window".to_string(), "true".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let arch_fc_chat_completion_request = ChatCompletionsRequest {
|
||||
messages: deserialized_body.messages.clone(),
|
||||
metadata: deserialized_body.metadata.clone(),
|
||||
metadata,
|
||||
stream: deserialized_body.stream,
|
||||
model: "--".to_string(),
|
||||
stream_options: deserialized_body.stream_options.clone(),
|
||||
|
|
@ -157,7 +168,7 @@ impl HttpContext for StreamContext {
|
|||
};
|
||||
|
||||
debug!("sending request to model server");
|
||||
trace!("request body: {}", json_data);
|
||||
debug!("request body: {}", json_data);
|
||||
|
||||
let mut headers = vec![
|
||||
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ pub struct StreamCallContext {
|
|||
pub struct StreamContext {
|
||||
system_prompt: Rc<Option<String>>,
|
||||
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
|
||||
_overrides: Rc<Option<Overrides>>,
|
||||
pub overrides: Rc<Option<Overrides>>,
|
||||
pub metrics: Rc<Metrics>,
|
||||
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
|
||||
pub context_id: u32,
|
||||
|
|
@ -89,7 +89,7 @@ impl StreamContext {
|
|||
streaming_response: false,
|
||||
user_prompt: None,
|
||||
is_chat_completions_request: false,
|
||||
_overrides: overrides,
|
||||
overrides: overrides,
|
||||
request_id: None,
|
||||
traceparent: None,
|
||||
_tracing: tracing,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue