mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
rename filter_chain/output_filter_chain to input_filters/output_filters, scope output filters to chat completions only
This commit is contained in:
parent
e458daf162
commit
ca64833686
13 changed files with 69 additions and 60 deletions
|
|
@ -426,13 +426,13 @@ def validate_and_render_schema():
|
||||||
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
|
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Validate filter_chain IDs on listeners reference valid agent/filter IDs
|
# Validate input_filters IDs on listeners reference valid agent/filter IDs
|
||||||
for listener in listeners:
|
for listener in listeners:
|
||||||
listener_filter_chain = listener.get("filter_chain", [])
|
listener_input_filters = listener.get("input_filters", [])
|
||||||
for fc_id in listener_filter_chain:
|
for fc_id in listener_input_filters:
|
||||||
if fc_id not in agent_id_keys:
|
if fc_id not in agent_id_keys:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Listener '{listener.get('name', 'unknown')}' references filter_chain id '{fc_id}' "
|
f"Listener '{listener.get('name', 'unknown')}' references input_filters id '{fc_id}' "
|
||||||
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
|
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -93,11 +93,11 @@ properties:
|
||||||
required:
|
required:
|
||||||
- id
|
- id
|
||||||
- description
|
- description
|
||||||
filter_chain:
|
input_filters:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
output_filter_chain:
|
output_filters:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
|
||||||
|
|
@ -195,8 +195,8 @@ mod tests {
|
||||||
listener_type: ListenerType::Agent,
|
listener_type: ListenerType::Agent,
|
||||||
name: name.to_string(),
|
name: name.to_string(),
|
||||||
agents: Some(agents),
|
agents: Some(agents),
|
||||||
filter_chain: None,
|
input_filters: None,
|
||||||
output_filter_chain: None,
|
output_filters: None,
|
||||||
port: 8080,
|
port: 8080,
|
||||||
router: None,
|
router: None,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -75,8 +75,8 @@ mod tests {
|
||||||
listener_type: ListenerType::Agent,
|
listener_type: ListenerType::Agent,
|
||||||
name: "test-listener".to_string(),
|
name: "test-listener".to_string(),
|
||||||
agents: Some(vec![agent_pipeline.clone()]),
|
agents: Some(vec![agent_pipeline.clone()]),
|
||||||
filter_chain: None,
|
input_filters: None,
|
||||||
output_filter_chain: None,
|
output_filters: None,
|
||||||
port: 8080,
|
port: 8080,
|
||||||
router: None,
|
router: None,
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -46,9 +46,9 @@ pub async fn llm_chat(
|
||||||
llm_providers: Arc<RwLock<LlmProviders>>,
|
llm_providers: Arc<RwLock<LlmProviders>>,
|
||||||
span_attributes: Arc<Option<SpanAttributes>>,
|
span_attributes: Arc<Option<SpanAttributes>>,
|
||||||
state_storage: Option<Arc<dyn StateStorage>>,
|
state_storage: Option<Arc<dyn StateStorage>>,
|
||||||
filter_chain: Arc<Option<Vec<String>>>,
|
input_filters: Arc<Option<Vec<String>>>,
|
||||||
filter_agents: Arc<HashMap<String, Agent>>,
|
input_filter_agents: Arc<HashMap<String, Agent>>,
|
||||||
output_filter_chain: Arc<Option<Vec<String>>>,
|
output_filters: Arc<Option<Vec<String>>>,
|
||||||
output_filter_agents: Arc<HashMap<String, Agent>>,
|
output_filter_agents: Arc<HashMap<String, Agent>>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
let request_path = request.uri().path().to_string();
|
let request_path = request.uri().path().to_string();
|
||||||
|
|
@ -89,9 +89,9 @@ pub async fn llm_chat(
|
||||||
request_id,
|
request_id,
|
||||||
request_path,
|
request_path,
|
||||||
request_headers,
|
request_headers,
|
||||||
filter_chain,
|
input_filters,
|
||||||
filter_agents,
|
input_filter_agents,
|
||||||
output_filter_chain,
|
output_filters,
|
||||||
output_filter_agents,
|
output_filter_agents,
|
||||||
)
|
)
|
||||||
.instrument(request_span)
|
.instrument(request_span)
|
||||||
|
|
@ -110,9 +110,9 @@ async fn llm_chat_inner(
|
||||||
request_id: String,
|
request_id: String,
|
||||||
request_path: String,
|
request_path: String,
|
||||||
mut request_headers: hyper::HeaderMap,
|
mut request_headers: hyper::HeaderMap,
|
||||||
filter_chain: Arc<Option<Vec<String>>>,
|
input_filters: Arc<Option<Vec<String>>>,
|
||||||
filter_agents: Arc<HashMap<String, Agent>>,
|
input_filter_agents: Arc<HashMap<String, Agent>>,
|
||||||
output_filter_chain: Arc<Option<Vec<String>>>,
|
output_filters: Arc<Option<Vec<String>>>,
|
||||||
output_filter_agents: Arc<HashMap<String, Agent>>,
|
output_filter_agents: Arc<HashMap<String, Agent>>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
// Set service name for LLM operations
|
// Set service name for LLM operations
|
||||||
|
|
@ -267,11 +267,11 @@ async fn llm_chat_inner(
|
||||||
debug!("removed plano_preference_config from metadata");
|
debug!("removed plano_preference_config from metadata");
|
||||||
}
|
}
|
||||||
|
|
||||||
// === Filter chain processing for model listener ===
|
// === Input filters processing for model listener ===
|
||||||
{
|
{
|
||||||
if let Some(ref fc) = *filter_chain {
|
if let Some(ref fc) = *input_filters {
|
||||||
if !fc.is_empty() {
|
if !fc.is_empty() {
|
||||||
debug!(filter_chain = ?fc, "processing model listener filter chain");
|
debug!(input_filters = ?fc, "processing model listener input filters");
|
||||||
|
|
||||||
// Create a temporary AgentFilterChain to reuse PipelineProcessor
|
// Create a temporary AgentFilterChain to reuse PipelineProcessor
|
||||||
let temp_filter_chain = AgentFilterChain {
|
let temp_filter_chain = AgentFilterChain {
|
||||||
|
|
@ -287,7 +287,7 @@ async fn llm_chat_inner(
|
||||||
.process_filter_chain(
|
.process_filter_chain(
|
||||||
&messages,
|
&messages,
|
||||||
&temp_filter_chain,
|
&temp_filter_chain,
|
||||||
&filter_agents,
|
&input_filter_agents,
|
||||||
&request_headers,
|
&request_headers,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
|
@ -508,14 +508,23 @@ async fn llm_chat_inner(
|
||||||
propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
|
propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
|
||||||
});
|
});
|
||||||
|
|
||||||
// Determine if output filter chain is configured
|
// Output filters are only supported for /v1/chat/completions — the SSE content
|
||||||
let has_output_filter = output_filter_chain
|
// extraction logic is specific to that API shape (choices[].delta.content).
|
||||||
|
let output_filters_configured = output_filters
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|fc| !fc.is_empty())
|
.map(|fc| !fc.is_empty())
|
||||||
.unwrap_or(false);
|
.unwrap_or(false);
|
||||||
|
let has_output_filter = output_filters_configured
|
||||||
|
&& request_path == common::consts::CHAT_COMPLETIONS_PATH;
|
||||||
|
if output_filters_configured && !has_output_filter {
|
||||||
|
warn!(
|
||||||
|
path = %request_path,
|
||||||
|
"output filters are configured but only supported for /v1/chat/completions, skipping"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Save request headers for output filter chain (before they're consumed by upstream request)
|
// Save request headers for output filters (before they're consumed by upstream request)
|
||||||
let output_filter_request_headers = if has_output_filter {
|
let output_filter_request_headers = if has_output_filter {
|
||||||
Some(request_headers.clone())
|
Some(request_headers.clone())
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -589,7 +598,7 @@ async fn llm_chat_inner(
|
||||||
request_id,
|
request_id,
|
||||||
);
|
);
|
||||||
if has_output_filter {
|
if has_output_filter {
|
||||||
let ofc = output_filter_chain.as_ref().as_ref().unwrap().clone();
|
let ofc = output_filters.as_ref().as_ref().unwrap().clone();
|
||||||
let ofa = (*output_filter_agents).clone();
|
let ofa = (*output_filter_agents).clone();
|
||||||
create_streaming_response_with_output_filter(
|
create_streaming_response_with_output_filter(
|
||||||
byte_stream,
|
byte_stream,
|
||||||
|
|
@ -603,7 +612,7 @@ async fn llm_chat_inner(
|
||||||
create_streaming_response(byte_stream, state_processor, 16)
|
create_streaming_response(byte_stream, state_processor, 16)
|
||||||
}
|
}
|
||||||
} else if has_output_filter {
|
} else if has_output_filter {
|
||||||
let ofc = output_filter_chain.as_ref().as_ref().unwrap().clone();
|
let ofc = output_filters.as_ref().as_ref().unwrap().clone();
|
||||||
let ofa = (*output_filter_agents).clone();
|
let ofa = (*output_filter_agents).clone();
|
||||||
create_streaming_response_with_output_filter(
|
create_streaming_response_with_output_filter(
|
||||||
byte_stream,
|
byte_stream,
|
||||||
|
|
|
||||||
|
|
@ -457,13 +457,13 @@ pub async fn filter_non_streaming_response(
|
||||||
Bytes::from(serde_json::to_string(&value).unwrap_or_else(|_| response_str.to_string()))
|
Bytes::from(serde_json::to_string(&value).unwrap_or_else(|_| response_str.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a streaming response that processes each chunk through an output filter chain.
|
/// Creates a streaming response that processes each chunk through output filters.
|
||||||
/// The output filter is called asynchronously for each SSE chunk's content.
|
/// The output filter is called asynchronously for each SSE chunk's content.
|
||||||
pub fn create_streaming_response_with_output_filter<S, P>(
|
pub fn create_streaming_response_with_output_filter<S, P>(
|
||||||
mut byte_stream: S,
|
mut byte_stream: S,
|
||||||
mut inner_processor: P,
|
mut inner_processor: P,
|
||||||
buffer_size: usize,
|
buffer_size: usize,
|
||||||
output_filter_chain: Vec<String>,
|
output_filters: Vec<String>,
|
||||||
output_filter_agents: HashMap<String, Agent>,
|
output_filter_agents: HashMap<String, Agent>,
|
||||||
request_headers: HeaderMap,
|
request_headers: HeaderMap,
|
||||||
) -> StreamingResponse
|
) -> StreamingResponse
|
||||||
|
|
@ -482,7 +482,7 @@ where
|
||||||
id: "output_filter".to_string(),
|
id: "output_filter".to_string(),
|
||||||
default: None,
|
default: None,
|
||||||
description: None,
|
description: None,
|
||||||
filter_chain: Some(output_filter_chain),
|
filter_chain: Some(output_filters),
|
||||||
};
|
};
|
||||||
|
|
||||||
while let Some(item) = byte_stream.next().await {
|
while let Some(item) = byte_stream.next().await {
|
||||||
|
|
|
||||||
|
|
@ -108,10 +108,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
.listeners
|
.listeners
|
||||||
.iter()
|
.iter()
|
||||||
.find(|l| l.listener_type == ListenerType::Model);
|
.find(|l| l.listener_type == ListenerType::Model);
|
||||||
let model_filter_chain: Arc<Option<Vec<String>>> =
|
let model_input_filters: Arc<Option<Vec<String>>> =
|
||||||
Arc::new(model_listener.and_then(|l| l.filter_chain.clone()));
|
Arc::new(model_listener.and_then(|l| l.input_filters.clone()));
|
||||||
let model_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
|
let model_input_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
|
||||||
model_filter_chain
|
model_input_filters
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|fc| {
|
.map(|fc| {
|
||||||
|
|
@ -121,10 +121,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
})
|
})
|
||||||
.unwrap_or_default(),
|
.unwrap_or_default(),
|
||||||
);
|
);
|
||||||
let model_output_filter_chain: Arc<Option<Vec<String>>> =
|
let model_output_filters: Arc<Option<Vec<String>>> =
|
||||||
Arc::new(model_listener.and_then(|l| l.output_filter_chain.clone()));
|
Arc::new(model_listener.and_then(|l| l.output_filters.clone()));
|
||||||
let model_output_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
|
let model_output_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
|
||||||
model_output_filter_chain
|
model_output_filters
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(|fc| {
|
.map(|fc| {
|
||||||
|
|
@ -228,9 +228,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
|
|
||||||
let llm_providers = llm_providers.clone();
|
let llm_providers = llm_providers.clone();
|
||||||
let agents_list = combined_agents_filters_list.clone();
|
let agents_list = combined_agents_filters_list.clone();
|
||||||
let model_filter_chain = model_filter_chain.clone();
|
let model_input_filters = model_input_filters.clone();
|
||||||
let model_filter_agents = model_filter_agents.clone();
|
let model_input_filter_agents = model_input_filter_agents.clone();
|
||||||
let model_output_filter_chain = model_output_filter_chain.clone();
|
let model_output_filters = model_output_filters.clone();
|
||||||
let model_output_filter_agents = model_output_filter_agents.clone();
|
let model_output_filter_agents = model_output_filter_agents.clone();
|
||||||
let listeners = listeners.clone();
|
let listeners = listeners.clone();
|
||||||
let span_attributes = span_attributes.clone();
|
let span_attributes = span_attributes.clone();
|
||||||
|
|
@ -243,9 +243,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
let llm_providers = llm_providers.clone();
|
let llm_providers = llm_providers.clone();
|
||||||
let model_aliases = Arc::clone(&model_aliases);
|
let model_aliases = Arc::clone(&model_aliases);
|
||||||
let agents_list = agents_list.clone();
|
let agents_list = agents_list.clone();
|
||||||
let model_filter_chain = model_filter_chain.clone();
|
let model_input_filters = model_input_filters.clone();
|
||||||
let model_filter_agents = model_filter_agents.clone();
|
let model_input_filter_agents = model_input_filter_agents.clone();
|
||||||
let model_output_filter_chain = model_output_filter_chain.clone();
|
let model_output_filters = model_output_filters.clone();
|
||||||
let model_output_filter_agents = model_output_filter_agents.clone();
|
let model_output_filter_agents = model_output_filter_agents.clone();
|
||||||
let listeners = listeners.clone();
|
let listeners = listeners.clone();
|
||||||
let span_attributes = span_attributes.clone();
|
let span_attributes = span_attributes.clone();
|
||||||
|
|
@ -305,9 +305,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||||
llm_providers,
|
llm_providers,
|
||||||
span_attributes,
|
span_attributes,
|
||||||
state_storage,
|
state_storage,
|
||||||
model_filter_chain,
|
model_input_filters,
|
||||||
model_filter_agents,
|
model_input_filter_agents,
|
||||||
model_output_filter_chain,
|
model_output_filters,
|
||||||
model_output_filter_agents,
|
model_output_filter_agents,
|
||||||
)
|
)
|
||||||
.with_context(parent_cx)
|
.with_context(parent_cx)
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,8 @@ pub struct Listener {
|
||||||
pub name: String,
|
pub name: String,
|
||||||
pub router: Option<String>,
|
pub router: Option<String>,
|
||||||
pub agents: Option<Vec<AgentFilterChain>>,
|
pub agents: Option<Vec<AgentFilterChain>>,
|
||||||
pub filter_chain: Option<Vec<String>>,
|
pub input_filters: Option<Vec<String>>,
|
||||||
pub output_filter_chain: Option<Vec<String>>,
|
pub output_filters: Option<Vec<String>>,
|
||||||
pub port: u16,
|
pub port: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
Run content-safety filters on direct LLM requests — no agent layer required.
|
Run content-safety filters on direct LLM requests — no agent layer required.
|
||||||
|
|
||||||
This demo uses the `filter_chain` feature on a **model-type listener** to intercept
|
This demo uses the `input_filters` feature on a **model-type listener** to intercept
|
||||||
`/v1/chat/completions` requests and block unsafe content before they reach the LLM provider.
|
`/v1/chat/completions` requests and block unsafe content before they reach the LLM provider.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
@ -10,7 +10,7 @@ This demo uses the `filter_chain` feature on a **model-type listener** to interc
|
||||||
```
|
```
|
||||||
Client ──► Plano (model listener :12000)
|
Client ──► Plano (model listener :12000)
|
||||||
│
|
│
|
||||||
├─ filter_chain: content_guard ──► Block / Allow
|
├─ input_filters: content_guard ──► Block / Allow
|
||||||
│
|
│
|
||||||
└─ model_provider: openai/gpt-4o-mini
|
└─ model_provider: openai/gpt-4o-mini
|
||||||
```
|
```
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ listeners:
|
||||||
- type: model
|
- type: model
|
||||||
name: llm_gateway
|
name: llm_gateway
|
||||||
port: 12000
|
port: 12000
|
||||||
filter_chain:
|
input_filters:
|
||||||
- content_guard
|
- content_guard
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
|
|
|
||||||
|
|
@ -2,20 +2,20 @@
|
||||||
|
|
||||||
Automatically redact PII from LLM requests and restore it in responses — inspired by [Uber's GenAI Gateway PII Redactor](https://www.uber.com/blog/genai-gateway/).
|
Automatically redact PII from LLM requests and restore it in responses — inspired by [Uber's GenAI Gateway PII Redactor](https://www.uber.com/blog/genai-gateway/).
|
||||||
|
|
||||||
This demo uses both `filter_chain` (input) and `output_filter_chain` (output) on a **model-type listener** to anonymize PII before it reaches the LLM provider, then de-anonymize the response before returning it to the client.
|
This demo uses both `input_filters` and `output_filters` on a **model-type listener** to anonymize PII before it reaches the LLM provider, then de-anonymize the response before returning it to the client.
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
```
|
```
|
||||||
Client ──► Plano (model listener :12000)
|
Client ──► Plano (model listener :12000)
|
||||||
│
|
│
|
||||||
├─ filter_chain: pii_anonymizer
|
├─ input_filters: pii_anonymizer
|
||||||
│ └─ Replace PII with [EMAIL_0], [SSN_0], etc.
|
│ └─ Replace PII with [EMAIL_0], [SSN_0], etc.
|
||||||
│
|
│
|
||||||
├─ model_provider: openai/gpt-4o-mini
|
├─ model_provider: openai/gpt-4o-mini
|
||||||
│ └─ LLM only sees anonymized data
|
│ └─ LLM only sees anonymized data
|
||||||
│
|
│
|
||||||
└─ output_filter_chain: pii_deanonymizer
|
└─ output_filters: pii_deanonymizer
|
||||||
└─ Restore [EMAIL_0] → original email (per-chunk for streaming)
|
└─ Restore [EMAIL_0] → original email (per-chunk for streaming)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -82,7 +82,7 @@ Check the PII filter service logs in the terminal running `start_agents.sh`. You
|
||||||
|
|
||||||
## How Streaming De-anonymization Works
|
## How Streaming De-anonymization Works
|
||||||
|
|
||||||
For streaming responses, each SSE chunk is sent through the output filter chain as it arrives from the LLM:
|
For streaming responses, each SSE chunk is sent through the output filters as it arrives from the LLM:
|
||||||
|
|
||||||
1. Plano receives a chunk with content like `"The email [EMAIL_0] belongs to..."`
|
1. Plano receives a chunk with content like `"The email [EMAIL_0] belongs to..."`
|
||||||
2. The chunk content is sent to the `/deanonymize` endpoint
|
2. The chunk content is sent to the `/deanonymize` endpoint
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,9 @@ listeners:
|
||||||
- type: model
|
- type: model
|
||||||
name: llm_gateway
|
name: llm_gateway
|
||||||
port: 12000
|
port: 12000
|
||||||
filter_chain:
|
input_filters:
|
||||||
- pii_anonymizer
|
- pii_anonymizer
|
||||||
output_filter_chain:
|
output_filters:
|
||||||
- pii_deanonymizer
|
- pii_deanonymizer
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
|
|
|
||||||
|
|
@ -66,8 +66,8 @@ listeners:
|
||||||
name: model_1
|
name: model_1
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
port: 12000
|
port: 12000
|
||||||
# Optional: attach a filter chain for input guardrails on direct LLM requests
|
# Optional: attach input filters for guardrails on direct LLM requests
|
||||||
# filter_chain:
|
# input_filters:
|
||||||
# - input_guards
|
# - input_guards
|
||||||
|
|
||||||
# Prompt listener for function calling (for prompt_targets)
|
# Prompt listener for function calling (for prompt_targets)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue