mirror of
https://github.com/katanemo/plano.git
synced 2026-05-07 06:42:42 +02:00
make ratelimit section optional (#168)
This commit is contained in:
parent
f9e3a052fc
commit
6b70768170
4 changed files with 14 additions and 20 deletions
|
|
@ -259,7 +259,7 @@ impl RootContext for FilterContext {
|
|||
self.prompt_targets = Rc::new(prompt_targets);
|
||||
self.mode = config.mode.unwrap_or_default();
|
||||
|
||||
ratelimit::ratelimits(config.ratelimits);
|
||||
ratelimit::ratelimits(Some(config.ratelimits.unwrap_or_default()));
|
||||
|
||||
if let Some(prompt_guards) = config.prompt_guards {
|
||||
self.prompt_guards = Rc::new(prompt_guards)
|
||||
|
|
@ -280,15 +280,10 @@ impl RootContext for FilterContext {
|
|||
);
|
||||
|
||||
// No StreamContext can be created until the Embedding Store is fully initialized.
|
||||
let embedding_store;
|
||||
match self.mode {
|
||||
GatewayMode::Llm => {
|
||||
embedding_store = None;
|
||||
}
|
||||
GatewayMode::Prompt => {
|
||||
embedding_store = Some(Rc::clone(self.embeddings_store.as_ref().unwrap()))
|
||||
}
|
||||
}
|
||||
let embedding_store = match self.mode {
|
||||
GatewayMode::Llm => None,
|
||||
GatewayMode::Prompt => Some(Rc::clone(self.embeddings_store.as_ref().unwrap())),
|
||||
};
|
||||
Some(Box::new(StreamContext::new(
|
||||
context_id,
|
||||
Rc::clone(&self.metrics),
|
||||
|
|
|
|||
|
|
@ -404,6 +404,14 @@ mod test {
|
|||
use std::num::NonZero;
|
||||
use std::thread;
|
||||
|
||||
#[test]
|
||||
fn make_ratelimits_optional() {
|
||||
let ratelimits_config = Vec::new();
|
||||
|
||||
// Initialize in the main thread.
|
||||
ratelimits(Some(ratelimits_config));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_threads_have_same_ratelimit_data_structure() {
|
||||
let ratelimits_config = Some(vec![Ratelimit {
|
||||
|
|
|
|||
|
|
@ -944,7 +944,7 @@ impl StreamContext {
|
|||
) -> Result<(), ratelimit::Error> {
|
||||
if let Some(selector) = self.ratelimit_selector.take() {
|
||||
// Tokenize and Ratelimit.
|
||||
if let Ok(token_count) = tokenizer::token_count(model, &json_string) {
|
||||
if let Ok(token_count) = tokenizer::token_count(model, json_string) {
|
||||
ratelimit::ratelimits(None).read().unwrap().check_limit(
|
||||
model.to_owned(),
|
||||
selector,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue