mirror of
https://github.com/katanemo/plano.git
synced 2026-04-29 19:06:34 +02:00
Redis-backed session cache for cross-replica model affinity (#879)
Some checks failed
CI / pre-commit (push) Has been cancelled
CI / plano-tools-tests (push) Has been cancelled
CI / native-smoke-test (push) Has been cancelled
CI / docker-build (push) Has been cancelled
CI / validate-config (push) Has been cancelled
Publish docker image (latest) / build-arm64 (push) Has been cancelled
Publish docker image (latest) / build-amd64 (push) Has been cancelled
Build and Deploy Documentation / build (push) Has been cancelled
CI / security-scan (push) Has been cancelled
CI / test-prompt-gateway (push) Has been cancelled
CI / test-model-alias-routing (push) Has been cancelled
CI / test-responses-api-with-state (push) Has been cancelled
CI / e2e-plano-tests (3.10) (push) Has been cancelled
CI / e2e-plano-tests (3.11) (push) Has been cancelled
CI / e2e-plano-tests (3.12) (push) Has been cancelled
CI / e2e-plano-tests (3.13) (push) Has been cancelled
CI / e2e-plano-tests (3.14) (push) Has been cancelled
CI / e2e-demo-preference (push) Has been cancelled
CI / e2e-demo-currency (push) Has been cancelled
Publish docker image (latest) / create-manifest (push) Has been cancelled
Some checks failed
CI / pre-commit (push) Has been cancelled
CI / plano-tools-tests (push) Has been cancelled
CI / native-smoke-test (push) Has been cancelled
CI / docker-build (push) Has been cancelled
CI / validate-config (push) Has been cancelled
Publish docker image (latest) / build-arm64 (push) Has been cancelled
Publish docker image (latest) / build-amd64 (push) Has been cancelled
Build and Deploy Documentation / build (push) Has been cancelled
CI / security-scan (push) Has been cancelled
CI / test-prompt-gateway (push) Has been cancelled
CI / test-model-alias-routing (push) Has been cancelled
CI / test-responses-api-with-state (push) Has been cancelled
CI / e2e-plano-tests (3.10) (push) Has been cancelled
CI / e2e-plano-tests (3.11) (push) Has been cancelled
CI / e2e-plano-tests (3.12) (push) Has been cancelled
CI / e2e-plano-tests (3.13) (push) Has been cancelled
CI / e2e-plano-tests (3.14) (push) Has been cancelled
CI / e2e-demo-preference (push) Has been cancelled
CI / e2e-demo-currency (push) Has been cancelled
Publish docker image (latest) / create-manifest (push) Has been cancelled
* add pluggable session cache with Redis backend
* add Redis session affinity demos (Docker Compose and Kubernetes)
* address PR review feedback on session cache
* document Redis session cache backend for model affinity
* sync rendered config reference with session_cache addition
* add tenant-scoped Redis session cache keys and remove dead log_affinity_hit
- Add tenant_header to SessionCacheConfig; when set, cache keys are scoped
as plano:affinity:{tenant_id}:{session_id} for multi-tenant isolation
- Thread tenant_id through RouterService, routing_service, and llm handlers
- Use Cow<'_, str> in session_key to avoid allocation when no tenant is set
- Remove unused log_affinity_hit (logging was already inlined at call sites)
* remove session_affinity_redis and session_affinity_redis_k8s demos
This commit is contained in:
parent
128059e7c1
commit
980faef6be
15 changed files with 1538 additions and 729 deletions
|
|
@ -7,12 +7,32 @@ use crate::api::open_ai::{
|
|||
ChatCompletionTool, FunctionDefinition, FunctionParameter, FunctionParameters, ParameterType,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum SessionCacheType {
|
||||
#[default]
|
||||
Memory,
|
||||
Redis,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SessionCacheConfig {
|
||||
#[serde(rename = "type", default)]
|
||||
pub cache_type: SessionCacheType,
|
||||
/// Redis URL, e.g. `redis://localhost:6379`. Required when `type` is `redis`.
|
||||
pub url: Option<String>,
|
||||
/// Optional HTTP header name whose value is used as a tenant prefix in the cache key.
|
||||
/// When set, keys are scoped as `plano:affinity:{tenant_id}:{session_id}`.
|
||||
pub tenant_header: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Routing {
|
||||
pub llm_provider: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub session_ttl_seconds: Option<u64>,
|
||||
pub session_max_entries: Option<usize>,
|
||||
pub session_cache: Option<SessionCacheConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue