Redis-backed session cache for cross-replica model affinity (#879)
Some checks failed
CI / pre-commit (push) Has been cancelled
CI / plano-tools-tests (push) Has been cancelled
CI / native-smoke-test (push) Has been cancelled
CI / docker-build (push) Has been cancelled
CI / validate-config (push) Has been cancelled
Publish docker image (latest) / build-arm64 (push) Has been cancelled
Publish docker image (latest) / build-amd64 (push) Has been cancelled
Build and Deploy Documentation / build (push) Has been cancelled
CI / security-scan (push) Has been cancelled
CI / test-prompt-gateway (push) Has been cancelled
CI / test-model-alias-routing (push) Has been cancelled
CI / test-responses-api-with-state (push) Has been cancelled
CI / e2e-plano-tests (3.10) (push) Has been cancelled
CI / e2e-plano-tests (3.11) (push) Has been cancelled
CI / e2e-plano-tests (3.12) (push) Has been cancelled
CI / e2e-plano-tests (3.13) (push) Has been cancelled
CI / e2e-plano-tests (3.14) (push) Has been cancelled
CI / e2e-demo-preference (push) Has been cancelled
CI / e2e-demo-currency (push) Has been cancelled
Publish docker image (latest) / create-manifest (push) Has been cancelled

* add pluggable session cache with Redis backend

* add Redis session affinity demos (Docker Compose and Kubernetes)

* address PR review feedback on session cache

* document Redis session cache backend for model affinity

* sync rendered config reference with session_cache addition

* add tenant-scoped Redis session cache keys and remove dead log_affinity_hit

- Add tenant_header to SessionCacheConfig; when set, cache keys are scoped
  as plano:affinity:{tenant_id}:{session_id} for multi-tenant isolation
- Thread tenant_id through RouterService, routing_service, and llm handlers
- Use Cow<'_, str> in session_key to avoid allocation when no tenant is set
- Remove unused log_affinity_hit (logging was already inlined at call sites)

* remove session_affinity_redis and session_affinity_redis_k8s demos
This commit is contained in:
Musa 2026-04-13 19:30:47 -07:00 committed by GitHub
parent 128059e7c1
commit 980faef6be
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1538 additions and 729 deletions

View file

@ -8,6 +8,7 @@ use brightstaff::handlers::routing_service::routing_decision;
use brightstaff::router::llm::RouterService;
use brightstaff::router::model_metrics::ModelMetricsService;
use brightstaff::router::orchestrator::OrchestratorService;
use brightstaff::session_cache::init_session_cache;
use brightstaff::state::memory::MemoryConversationalStorage;
use brightstaff::state::postgresql::PostgreSQLConversationStorage;
use brightstaff::state::StateStorage;
@ -175,7 +176,7 @@ async fn init_app_state(
.unwrap_or_else(|| DEFAULT_ROUTING_LLM_PROVIDER.to_string());
let session_ttl_seconds = config.routing.as_ref().and_then(|r| r.session_ttl_seconds);
let session_max_entries = config.routing.as_ref().and_then(|r| r.session_max_entries);
let session_cache = init_session_cache(config).await?;
// Validate that top-level routing_preferences requires v0.4.0+.
let config_version = parse_semver(&config.version);
@ -297,6 +298,12 @@ async fn init_app_state(
}
}
let session_tenant_header = config
.routing
.as_ref()
.and_then(|r| r.session_cache.as_ref())
.and_then(|c| c.tenant_header.clone());
let router_service = Arc::new(RouterService::new(
config.routing_preferences.clone(),
metrics_service,
@ -304,21 +311,10 @@ async fn init_app_state(
routing_model_name,
routing_llm_provider,
session_ttl_seconds,
session_max_entries,
session_cache,
session_tenant_header,
));
// Spawn background task to clean up expired session cache entries every 5 minutes
{
let router_service = Arc::clone(&router_service);
tokio::spawn(async move {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(300));
loop {
interval.tick().await;
router_service.cleanup_expired_sessions().await;
}
});
}
let orchestrator_model_name: String = overrides
.agent_orchestration_model
.as_deref()