mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
add pluggable session cache with Redis backend
This commit is contained in:
parent
8dedf0bec1
commit
50670f843d
10 changed files with 353 additions and 75 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use std::{collections::HashMap, sync::Arc, time::Duration, time::Instant};
|
||||
use std::{collections::HashMap, sync::Arc, time::Duration};
|
||||
|
||||
use common::{
|
||||
configuration::TopLevelRoutingPreference,
|
||||
|
|
@ -9,7 +9,6 @@ use super::router_model::{ModelUsagePreference, RoutingPreference};
|
|||
use hermesllm::apis::openai::Message;
|
||||
use hyper::header;
|
||||
use thiserror::Error;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use super::http::{self, post_and_extract_content};
|
||||
|
|
@ -17,17 +16,11 @@ use super::model_metrics::ModelMetricsService;
|
|||
use super::router_model::RouterModel;
|
||||
|
||||
use crate::router::router_model_v1;
|
||||
use crate::session_cache::SessionCache;
|
||||
|
||||
pub use crate::session_cache::CachedRoute;
|
||||
|
||||
const DEFAULT_SESSION_TTL_SECONDS: u64 = 600;
|
||||
const DEFAULT_SESSION_MAX_ENTRIES: usize = 10_000;
|
||||
const MAX_SESSION_MAX_ENTRIES: usize = 10_000;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CachedRoute {
|
||||
pub model_name: String,
|
||||
pub route_name: Option<String>,
|
||||
pub cached_at: Instant,
|
||||
}
|
||||
|
||||
pub struct RouterService {
|
||||
router_url: String,
|
||||
|
|
@ -36,9 +29,8 @@ pub struct RouterService {
|
|||
routing_provider_name: String,
|
||||
top_level_preferences: HashMap<String, TopLevelRoutingPreference>,
|
||||
metrics_service: Option<Arc<ModelMetricsService>>,
|
||||
session_cache: RwLock<HashMap<String, CachedRoute>>,
|
||||
session_cache: Arc<dyn SessionCache>,
|
||||
session_ttl: Duration,
|
||||
session_max_entries: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
|
@ -60,7 +52,7 @@ impl RouterService {
|
|||
routing_model_name: String,
|
||||
routing_provider_name: String,
|
||||
session_ttl_seconds: Option<u64>,
|
||||
session_max_entries: Option<usize>,
|
||||
session_cache: Arc<dyn SessionCache>,
|
||||
) -> Self {
|
||||
let top_level_preferences: HashMap<String, TopLevelRoutingPreference> = top_level_prefs
|
||||
.map_or_else(HashMap::new, |prefs| {
|
||||
|
|
@ -93,9 +85,6 @@ impl RouterService {
|
|||
|
||||
let session_ttl =
|
||||
Duration::from_secs(session_ttl_seconds.unwrap_or(DEFAULT_SESSION_TTL_SECONDS));
|
||||
let session_max_entries = session_max_entries
|
||||
.unwrap_or(DEFAULT_SESSION_MAX_ENTRIES)
|
||||
.min(MAX_SESSION_MAX_ENTRIES);
|
||||
|
||||
RouterService {
|
||||
router_url,
|
||||
|
|
@ -104,65 +93,49 @@ impl RouterService {
|
|||
routing_provider_name,
|
||||
top_level_preferences,
|
||||
metrics_service,
|
||||
session_cache: RwLock::new(HashMap::new()),
|
||||
session_cache,
|
||||
session_ttl,
|
||||
session_max_entries,
|
||||
}
|
||||
}
|
||||
|
||||
/// Look up a cached routing decision by session ID.
|
||||
/// Returns None if not found or expired.
|
||||
pub async fn get_cached_route(&self, session_id: &str) -> Option<CachedRoute> {
|
||||
let cache = self.session_cache.read().await;
|
||||
if let Some(entry) = cache.get(session_id) {
|
||||
if entry.cached_at.elapsed() < self.session_ttl {
|
||||
return Some(entry.clone());
|
||||
}
|
||||
}
|
||||
None
|
||||
self.session_cache.get(session_id).await
|
||||
}
|
||||
|
||||
/// Store a routing decision in the session cache.
|
||||
/// If at max capacity, evicts the oldest entry.
|
||||
pub async fn cache_route(
|
||||
&self,
|
||||
session_id: String,
|
||||
model_name: String,
|
||||
route_name: Option<String>,
|
||||
) {
|
||||
let mut cache = self.session_cache.write().await;
|
||||
if cache.len() >= self.session_max_entries && !cache.contains_key(&session_id) {
|
||||
if let Some(oldest_key) = cache
|
||||
.iter()
|
||||
.min_by_key(|(_, v)| v.cached_at)
|
||||
.map(|(k, _)| k.clone())
|
||||
{
|
||||
cache.remove(&oldest_key);
|
||||
}
|
||||
}
|
||||
cache.insert(
|
||||
session_id,
|
||||
CachedRoute {
|
||||
model_name,
|
||||
route_name,
|
||||
cached_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
self.session_cache
|
||||
.put(
|
||||
&session_id,
|
||||
CachedRoute {
|
||||
model_name,
|
||||
route_name,
|
||||
},
|
||||
self.session_ttl,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Remove all expired entries from the session cache.
|
||||
pub async fn cleanup_expired_sessions(&self) {
|
||||
let mut cache = self.session_cache.write().await;
|
||||
let before = cache.len();
|
||||
cache.retain(|_, entry| entry.cached_at.elapsed() < self.session_ttl);
|
||||
let removed = before - cache.len();
|
||||
if removed > 0 {
|
||||
info!(
|
||||
removed = removed,
|
||||
remaining = cache.len(),
|
||||
"cleaned up expired session cache entries"
|
||||
);
|
||||
}
|
||||
self.session_cache.cleanup_expired().await;
|
||||
}
|
||||
|
||||
/// Log a routing decision, used to surface affinity hits in structured logs.
|
||||
pub fn log_affinity_hit(session_id: &str, model_name: &str, route_name: &Option<String>) {
|
||||
info!(
|
||||
session_id = %session_id,
|
||||
model = %model_name,
|
||||
route = ?route_name,
|
||||
"returning pinned routing decision from cache"
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn determine_route(
|
||||
|
|
@ -283,8 +256,11 @@ impl RouterService {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::session_cache::memory::MemorySessionCache;
|
||||
|
||||
fn make_router_service(ttl_seconds: u64, max_entries: usize) -> RouterService {
|
||||
let ttl = Duration::from_secs(ttl_seconds);
|
||||
let session_cache = Arc::new(MemorySessionCache::new(ttl, max_entries));
|
||||
RouterService::new(
|
||||
None,
|
||||
None,
|
||||
|
|
@ -292,7 +268,7 @@ mod tests {
|
|||
"Arch-Router".to_string(),
|
||||
"arch-router".to_string(),
|
||||
Some(ttl_seconds),
|
||||
Some(max_entries),
|
||||
session_cache,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -335,8 +311,9 @@ mod tests {
|
|||
|
||||
svc.cleanup_expired_sessions().await;
|
||||
|
||||
let cache = svc.session_cache.read().await;
|
||||
assert!(cache.is_empty());
|
||||
// After cleanup, both expired entries should be gone
|
||||
assert!(svc.get_cached_route("s1").await.is_none());
|
||||
assert!(svc.get_cached_route("s2").await.is_none());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -351,11 +328,10 @@ mod tests {
|
|||
svc.cache_route("s3".to_string(), "model-c".to_string(), None)
|
||||
.await;
|
||||
|
||||
let cache = svc.session_cache.read().await;
|
||||
assert_eq!(cache.len(), 2);
|
||||
assert!(!cache.contains_key("s1"));
|
||||
assert!(cache.contains_key("s2"));
|
||||
assert!(cache.contains_key("s3"));
|
||||
// s1 should be evicted (oldest); s2 and s3 should remain
|
||||
assert!(svc.get_cached_route("s1").await.is_none());
|
||||
assert!(svc.get_cached_route("s2").await.is_some());
|
||||
assert!(svc.get_cached_route("s3").await.is_some());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -373,8 +349,9 @@ mod tests {
|
|||
)
|
||||
.await;
|
||||
|
||||
let cache = svc.session_cache.read().await;
|
||||
assert_eq!(cache.len(), 2);
|
||||
assert_eq!(cache.get("s1").unwrap().model_name, "model-a-updated");
|
||||
// Both sessions should still be present
|
||||
let s1 = svc.get_cached_route("s1").await.unwrap();
|
||||
assert_eq!(s1.model_name, "model-a-updated");
|
||||
assert!(svc.get_cached_route("s2").await.is_some());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue