mirror of
https://github.com/katanemo/plano.git
synced 2026-05-30 14:25:15 +02:00
make tiktoken token counting optional via enable_token_counting override
By default, use cheap len/4 estimate for input token counting (metrics and ratelimit). When enable_token_counting is set to true in overrides, use tiktoken BPE for exact counts. This eliminates ~80ms of per-request latency from tiktoken in the WASM filter while keeping metrics and ratelimit functional. Made-with: Cursor
This commit is contained in:
parent
406fa92802
commit
e5f3039924
3 changed files with 19 additions and 8 deletions
|
|
@ -131,6 +131,7 @@ pub struct Overrides {
|
|||
pub use_agent_orchestrator: Option<bool>,
|
||||
pub llm_routing_model: Option<String>,
|
||||
pub agent_orchestration_model: Option<String>,
|
||||
pub enable_token_counting: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue