rename filter_chain/output_filter_chain to input_filters/output_filters, scope output filters to chat completions only

This commit is contained in:
Adil Hafeez 2026-03-13 17:58:40 -07:00
parent e458daf162
commit ca64833686
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
13 changed files with 69 additions and 60 deletions

View file

@ -426,13 +426,13 @@ def validate_and_render_schema():
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers" "Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
) )
# Validate filter_chain IDs on listeners reference valid agent/filter IDs # Validate input_filters IDs on listeners reference valid agent/filter IDs
for listener in listeners: for listener in listeners:
listener_filter_chain = listener.get("filter_chain", []) listener_input_filters = listener.get("input_filters", [])
for fc_id in listener_filter_chain: for fc_id in listener_input_filters:
if fc_id not in agent_id_keys: if fc_id not in agent_id_keys:
raise Exception( raise Exception(
f"Listener '{listener.get('name', 'unknown')}' references filter_chain id '{fc_id}' " f"Listener '{listener.get('name', 'unknown')}' references input_filters id '{fc_id}' "
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}" f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
) )

View file

@ -93,11 +93,11 @@ properties:
required: required:
- id - id
- description - description
filter_chain: input_filters:
type: array type: array
items: items:
type: string type: string
output_filter_chain: output_filters:
type: array type: array
items: items:
type: string type: string

View file

@ -195,8 +195,8 @@ mod tests {
listener_type: ListenerType::Agent, listener_type: ListenerType::Agent,
name: name.to_string(), name: name.to_string(),
agents: Some(agents), agents: Some(agents),
filter_chain: None, input_filters: None,
output_filter_chain: None, output_filters: None,
port: 8080, port: 8080,
router: None, router: None,
} }

View file

@ -75,8 +75,8 @@ mod tests {
listener_type: ListenerType::Agent, listener_type: ListenerType::Agent,
name: "test-listener".to_string(), name: "test-listener".to_string(),
agents: Some(vec![agent_pipeline.clone()]), agents: Some(vec![agent_pipeline.clone()]),
filter_chain: None, input_filters: None,
output_filter_chain: None, output_filters: None,
port: 8080, port: 8080,
router: None, router: None,
}; };

View file

@ -46,9 +46,9 @@ pub async fn llm_chat(
llm_providers: Arc<RwLock<LlmProviders>>, llm_providers: Arc<RwLock<LlmProviders>>,
span_attributes: Arc<Option<SpanAttributes>>, span_attributes: Arc<Option<SpanAttributes>>,
state_storage: Option<Arc<dyn StateStorage>>, state_storage: Option<Arc<dyn StateStorage>>,
filter_chain: Arc<Option<Vec<String>>>, input_filters: Arc<Option<Vec<String>>>,
filter_agents: Arc<HashMap<String, Agent>>, input_filter_agents: Arc<HashMap<String, Agent>>,
output_filter_chain: Arc<Option<Vec<String>>>, output_filters: Arc<Option<Vec<String>>>,
output_filter_agents: Arc<HashMap<String, Agent>>, output_filter_agents: Arc<HashMap<String, Agent>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let request_path = request.uri().path().to_string(); let request_path = request.uri().path().to_string();
@ -89,9 +89,9 @@ pub async fn llm_chat(
request_id, request_id,
request_path, request_path,
request_headers, request_headers,
filter_chain, input_filters,
filter_agents, input_filter_agents,
output_filter_chain, output_filters,
output_filter_agents, output_filter_agents,
) )
.instrument(request_span) .instrument(request_span)
@ -110,9 +110,9 @@ async fn llm_chat_inner(
request_id: String, request_id: String,
request_path: String, request_path: String,
mut request_headers: hyper::HeaderMap, mut request_headers: hyper::HeaderMap,
filter_chain: Arc<Option<Vec<String>>>, input_filters: Arc<Option<Vec<String>>>,
filter_agents: Arc<HashMap<String, Agent>>, input_filter_agents: Arc<HashMap<String, Agent>>,
output_filter_chain: Arc<Option<Vec<String>>>, output_filters: Arc<Option<Vec<String>>>,
output_filter_agents: Arc<HashMap<String, Agent>>, output_filter_agents: Arc<HashMap<String, Agent>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> { ) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
// Set service name for LLM operations // Set service name for LLM operations
@ -267,11 +267,11 @@ async fn llm_chat_inner(
debug!("removed plano_preference_config from metadata"); debug!("removed plano_preference_config from metadata");
} }
// === Filter chain processing for model listener === // === Input filters processing for model listener ===
{ {
if let Some(ref fc) = *filter_chain { if let Some(ref fc) = *input_filters {
if !fc.is_empty() { if !fc.is_empty() {
debug!(filter_chain = ?fc, "processing model listener filter chain"); debug!(input_filters = ?fc, "processing model listener input filters");
// Create a temporary AgentFilterChain to reuse PipelineProcessor // Create a temporary AgentFilterChain to reuse PipelineProcessor
let temp_filter_chain = AgentFilterChain { let temp_filter_chain = AgentFilterChain {
@ -287,7 +287,7 @@ async fn llm_chat_inner(
.process_filter_chain( .process_filter_chain(
&messages, &messages,
&temp_filter_chain, &temp_filter_chain,
&filter_agents, &input_filter_agents,
&request_headers, &request_headers,
) )
.await .await
@ -508,14 +508,23 @@ async fn llm_chat_inner(
propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers)); propagator.inject_context(&cx, &mut HeaderInjector(&mut request_headers));
}); });
// Determine if output filter chain is configured // Output filters are only supported for /v1/chat/completions — the SSE content
let has_output_filter = output_filter_chain // extraction logic is specific to that API shape (choices[].delta.content).
let output_filters_configured = output_filters
.as_ref() .as_ref()
.as_ref() .as_ref()
.map(|fc| !fc.is_empty()) .map(|fc| !fc.is_empty())
.unwrap_or(false); .unwrap_or(false);
let has_output_filter = output_filters_configured
&& request_path == common::consts::CHAT_COMPLETIONS_PATH;
if output_filters_configured && !has_output_filter {
warn!(
path = %request_path,
"output filters are configured but only supported for /v1/chat/completions, skipping"
);
}
// Save request headers for output filter chain (before they're consumed by upstream request) // Save request headers for output filters (before they're consumed by upstream request)
let output_filter_request_headers = if has_output_filter { let output_filter_request_headers = if has_output_filter {
Some(request_headers.clone()) Some(request_headers.clone())
} else { } else {
@ -589,7 +598,7 @@ async fn llm_chat_inner(
request_id, request_id,
); );
if has_output_filter { if has_output_filter {
let ofc = output_filter_chain.as_ref().as_ref().unwrap().clone(); let ofc = output_filters.as_ref().as_ref().unwrap().clone();
let ofa = (*output_filter_agents).clone(); let ofa = (*output_filter_agents).clone();
create_streaming_response_with_output_filter( create_streaming_response_with_output_filter(
byte_stream, byte_stream,
@ -603,7 +612,7 @@ async fn llm_chat_inner(
create_streaming_response(byte_stream, state_processor, 16) create_streaming_response(byte_stream, state_processor, 16)
} }
} else if has_output_filter { } else if has_output_filter {
let ofc = output_filter_chain.as_ref().as_ref().unwrap().clone(); let ofc = output_filters.as_ref().as_ref().unwrap().clone();
let ofa = (*output_filter_agents).clone(); let ofa = (*output_filter_agents).clone();
create_streaming_response_with_output_filter( create_streaming_response_with_output_filter(
byte_stream, byte_stream,

View file

@ -457,13 +457,13 @@ pub async fn filter_non_streaming_response(
Bytes::from(serde_json::to_string(&value).unwrap_or_else(|_| response_str.to_string())) Bytes::from(serde_json::to_string(&value).unwrap_or_else(|_| response_str.to_string()))
} }
/// Creates a streaming response that processes each chunk through an output filter chain. /// Creates a streaming response that processes each chunk through output filters.
/// The output filter is called asynchronously for each SSE chunk's content. /// The output filter is called asynchronously for each SSE chunk's content.
pub fn create_streaming_response_with_output_filter<S, P>( pub fn create_streaming_response_with_output_filter<S, P>(
mut byte_stream: S, mut byte_stream: S,
mut inner_processor: P, mut inner_processor: P,
buffer_size: usize, buffer_size: usize,
output_filter_chain: Vec<String>, output_filters: Vec<String>,
output_filter_agents: HashMap<String, Agent>, output_filter_agents: HashMap<String, Agent>,
request_headers: HeaderMap, request_headers: HeaderMap,
) -> StreamingResponse ) -> StreamingResponse
@ -482,7 +482,7 @@ where
id: "output_filter".to_string(), id: "output_filter".to_string(),
default: None, default: None,
description: None, description: None,
filter_chain: Some(output_filter_chain), filter_chain: Some(output_filters),
}; };
while let Some(item) = byte_stream.next().await { while let Some(item) = byte_stream.next().await {

View file

@ -108,10 +108,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
.listeners .listeners
.iter() .iter()
.find(|l| l.listener_type == ListenerType::Model); .find(|l| l.listener_type == ListenerType::Model);
let model_filter_chain: Arc<Option<Vec<String>>> = let model_input_filters: Arc<Option<Vec<String>>> =
Arc::new(model_listener.and_then(|l| l.filter_chain.clone())); Arc::new(model_listener.and_then(|l| l.input_filters.clone()));
let model_filter_agents: Arc<HashMap<String, Agent>> = Arc::new( let model_input_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
model_filter_chain model_input_filters
.as_ref() .as_ref()
.as_ref() .as_ref()
.map(|fc| { .map(|fc| {
@ -121,10 +121,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
}) })
.unwrap_or_default(), .unwrap_or_default(),
); );
let model_output_filter_chain: Arc<Option<Vec<String>>> = let model_output_filters: Arc<Option<Vec<String>>> =
Arc::new(model_listener.and_then(|l| l.output_filter_chain.clone())); Arc::new(model_listener.and_then(|l| l.output_filters.clone()));
let model_output_filter_agents: Arc<HashMap<String, Agent>> = Arc::new( let model_output_filter_agents: Arc<HashMap<String, Agent>> = Arc::new(
model_output_filter_chain model_output_filters
.as_ref() .as_ref()
.as_ref() .as_ref()
.map(|fc| { .map(|fc| {
@ -228,9 +228,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone(); let llm_providers = llm_providers.clone();
let agents_list = combined_agents_filters_list.clone(); let agents_list = combined_agents_filters_list.clone();
let model_filter_chain = model_filter_chain.clone(); let model_input_filters = model_input_filters.clone();
let model_filter_agents = model_filter_agents.clone(); let model_input_filter_agents = model_input_filter_agents.clone();
let model_output_filter_chain = model_output_filter_chain.clone(); let model_output_filters = model_output_filters.clone();
let model_output_filter_agents = model_output_filter_agents.clone(); let model_output_filter_agents = model_output_filter_agents.clone();
let listeners = listeners.clone(); let listeners = listeners.clone();
let span_attributes = span_attributes.clone(); let span_attributes = span_attributes.clone();
@ -243,9 +243,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone(); let llm_providers = llm_providers.clone();
let model_aliases = Arc::clone(&model_aliases); let model_aliases = Arc::clone(&model_aliases);
let agents_list = agents_list.clone(); let agents_list = agents_list.clone();
let model_filter_chain = model_filter_chain.clone(); let model_input_filters = model_input_filters.clone();
let model_filter_agents = model_filter_agents.clone(); let model_input_filter_agents = model_input_filter_agents.clone();
let model_output_filter_chain = model_output_filter_chain.clone(); let model_output_filters = model_output_filters.clone();
let model_output_filter_agents = model_output_filter_agents.clone(); let model_output_filter_agents = model_output_filter_agents.clone();
let listeners = listeners.clone(); let listeners = listeners.clone();
let span_attributes = span_attributes.clone(); let span_attributes = span_attributes.clone();
@ -305,9 +305,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
llm_providers, llm_providers,
span_attributes, span_attributes,
state_storage, state_storage,
model_filter_chain, model_input_filters,
model_filter_agents, model_input_filter_agents,
model_output_filter_chain, model_output_filters,
model_output_filter_agents, model_output_filter_agents,
) )
.with_context(parent_cx) .with_context(parent_cx)

View file

@ -51,8 +51,8 @@ pub struct Listener {
pub name: String, pub name: String,
pub router: Option<String>, pub router: Option<String>,
pub agents: Option<Vec<AgentFilterChain>>, pub agents: Option<Vec<AgentFilterChain>>,
pub filter_chain: Option<Vec<String>>, pub input_filters: Option<Vec<String>>,
pub output_filter_chain: Option<Vec<String>>, pub output_filters: Option<Vec<String>>,
pub port: u16, pub port: u16,
} }

View file

@ -2,7 +2,7 @@
Run content-safety filters on direct LLM requests — no agent layer required. Run content-safety filters on direct LLM requests — no agent layer required.
This demo uses the `filter_chain` feature on a **model-type listener** to intercept This demo uses the `input_filters` feature on a **model-type listener** to intercept
`/v1/chat/completions` requests and block unsafe content before they reach the LLM provider. `/v1/chat/completions` requests and block unsafe content before they reach the LLM provider.
## Architecture ## Architecture
@ -10,7 +10,7 @@ This demo uses the `filter_chain` feature on a **model-type listener** to interc
``` ```
Client ──► Plano (model listener :12000) Client ──► Plano (model listener :12000)
├─ filter_chain: content_guard ──► Block / Allow ├─ input_filters: content_guard ──► Block / Allow
└─ model_provider: openai/gpt-4o-mini └─ model_provider: openai/gpt-4o-mini
``` ```

View file

@ -14,7 +14,7 @@ listeners:
- type: model - type: model
name: llm_gateway name: llm_gateway
port: 12000 port: 12000
filter_chain: input_filters:
- content_guard - content_guard
tracing: tracing:

View file

@ -2,20 +2,20 @@
Automatically redact PII from LLM requests and restore it in responses — inspired by [Uber's GenAI Gateway PII Redactor](https://www.uber.com/blog/genai-gateway/). Automatically redact PII from LLM requests and restore it in responses — inspired by [Uber's GenAI Gateway PII Redactor](https://www.uber.com/blog/genai-gateway/).
This demo uses both `filter_chain` (input) and `output_filter_chain` (output) on a **model-type listener** to anonymize PII before it reaches the LLM provider, then de-anonymize the response before returning it to the client. This demo uses both `input_filters` and `output_filters` on a **model-type listener** to anonymize PII before it reaches the LLM provider, then de-anonymize the response before returning it to the client.
## Architecture ## Architecture
``` ```
Client ──► Plano (model listener :12000) Client ──► Plano (model listener :12000)
├─ filter_chain: pii_anonymizer ├─ input_filters: pii_anonymizer
│ └─ Replace PII with [EMAIL_0], [SSN_0], etc. │ └─ Replace PII with [EMAIL_0], [SSN_0], etc.
├─ model_provider: openai/gpt-4o-mini ├─ model_provider: openai/gpt-4o-mini
│ └─ LLM only sees anonymized data │ └─ LLM only sees anonymized data
└─ output_filter_chain: pii_deanonymizer └─ output_filters: pii_deanonymizer
└─ Restore [EMAIL_0] → original email (per-chunk for streaming) └─ Restore [EMAIL_0] → original email (per-chunk for streaming)
``` ```
@ -82,7 +82,7 @@ Check the PII filter service logs in the terminal running `start_agents.sh`. You
## How Streaming De-anonymization Works ## How Streaming De-anonymization Works
For streaming responses, each SSE chunk is sent through the output filter chain as it arrives from the LLM: For streaming responses, each SSE chunk is sent through the output filters as it arrives from the LLM:
1. Plano receives a chunk with content like `"The email [EMAIL_0] belongs to..."` 1. Plano receives a chunk with content like `"The email [EMAIL_0] belongs to..."`
2. The chunk content is sent to the `/deanonymize` endpoint 2. The chunk content is sent to the `/deanonymize` endpoint

View file

@ -17,9 +17,9 @@ listeners:
- type: model - type: model
name: llm_gateway name: llm_gateway
port: 12000 port: 12000
filter_chain: input_filters:
- pii_anonymizer - pii_anonymizer
output_filter_chain: output_filters:
- pii_deanonymizer - pii_deanonymizer
tracing: tracing:

View file

@ -66,8 +66,8 @@ listeners:
name: model_1 name: model_1
address: 0.0.0.0 address: 0.0.0.0
port: 12000 port: 12000
# Optional: attach a filter chain for input guardrails on direct LLM requests # Optional: attach input filters for guardrails on direct LLM requests
# filter_chain: # input_filters:
# - input_guards # - input_guards
# Prompt listener for function calling (for prompt_targets) # Prompt listener for function calling (for prompt_targets)