merge origin/main into musa/custom-trace-attributes

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Musa 2026-02-23 13:43:57 -08:00
commit e30f93b1cd
No known key found for this signature in database
24 changed files with 268 additions and 45 deletions

View file

@ -79,13 +79,13 @@ jobs:
load: true
tags: |
${{ env.PLANO_DOCKER_IMAGE }}
${{ env.DOCKER_IMAGE }}:0.4.7
${{ env.DOCKER_IMAGE }}:0.4.8
${{ env.DOCKER_IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Save image as artifact
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.7 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.8 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
- name: Upload image artifact
uses: actions/upload-artifact@v4

View file

@ -137,6 +137,12 @@ To prepare a release (e.g., bumping from `0.4.6` to `0.4.7`), update the version
Commit message format: `release X.Y.Z`
## Workflow Preferences
- **Git commits:** Do NOT add `Co-Authored-By` lines. Keep commit messages short and concise (one line, no verbose descriptions). NEVER commit and push directly to `main`—always use a feature branch and PR.
- **Git branches:** Use the format `<github_username>/<feature_name>` when creating branches for PRs. Determine the username from `gh api user --jq .login`.
- **GitHub issues:** When a GitHub issue URL is pasted, fetch all requirements and context from the issue first. The end goal is always a PR with all tests passing.
## Key Conventions
- Rust edition 2021, formatted with `cargo fmt`, linted with `cargo clippy -D warnings`

View file

@ -24,7 +24,7 @@ export function Hero() {
>
<div className="inline-flex flex-wrap items-center gap-1.5 sm:gap-2 px-3 sm:px-4 py-1 rounded-full bg-[rgba(185,191,255,0.4)] border border-[var(--secondary)] shadow backdrop-blur hover:bg-[rgba(185,191,255,0.6)] transition-colors cursor-pointer">
<span className="text-xs sm:text-sm font-medium text-black/65">
v0.4.7
v0.4.8
</span>
<span className="text-xs sm:text-sm font-medium text-black ">

View file

@ -1 +1 @@
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.7
docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.8

View file

@ -1,3 +1,3 @@
"""Plano CLI - Intelligent Prompt Gateway."""
__version__ = "0.4.7"
__version__ = "0.4.8"

View file

@ -460,6 +460,12 @@ def validate_and_render_schema():
print("agent_orchestrator: ", agent_orchestrator)
overrides = config_yaml.get("overrides", {})
upstream_connect_timeout = overrides.get("upstream_connect_timeout", "5s")
upstream_tls_ca_path = overrides.get(
"upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt"
)
data = {
"prompt_gateway_listener": prompt_gateway,
"llm_gateway_listener": llm_gateway,
@ -471,6 +477,8 @@ def validate_and_render_schema():
"local_llms": llms_with_endpoint,
"agent_orchestrator": agent_orchestrator,
"listeners": listeners,
"upstream_connect_timeout": upstream_connect_timeout,
"upstream_tls_ca_path": upstream_tls_ca_path,
}
rendered = template.render(data)

View file

@ -5,5 +5,5 @@ PLANO_COLOR = "#969FF4"
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.7")
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.8")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"

View file

@ -1,6 +1,6 @@
[project]
name = "planoai"
version = "0.4.7"
version = "0.4.8"
description = "Python-based CLI tool to manage Plano."
authors = [{name = "Katanemo Labs, Inc."}]
readme = "README.md"

2
cli/uv.lock generated
View file

@ -337,7 +337,7 @@ wheels = [
[[package]]
name = "planoai"
version = "0.4.6"
version = "0.4.7"
source = { editable = "." }
dependencies = [
{ name = "click" },

View file

@ -595,7 +595,7 @@ static_resources:
clusters:
- name: arch
connect_timeout: 5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -618,9 +618,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: anthropic
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -643,9 +646,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: deepseek
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -668,9 +674,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: xai
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -693,9 +702,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: moonshotai
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -718,9 +730,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: zhipu
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -743,9 +758,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: together_ai
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -768,9 +786,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: gemini
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -793,9 +814,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: groq
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -818,9 +842,12 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: mistral
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -839,9 +866,16 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: openai
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -864,6 +898,9 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
- name: mistral_7b_instruct
connect_timeout: 0.5s
type: STRICT_DNS
@ -884,7 +921,7 @@ static_resources:
{% if cluster.connect_timeout -%}
connect_timeout: {{ cluster.connect_timeout }}
{% else -%}
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
{% endif -%}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
@ -913,12 +950,15 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
{% endif %}
{% endfor %}
{% for local_llm_provider in local_llms %}
- name: {{ local_llm_provider.cluster_name }}
connect_timeout: 0.5s
connect_timeout: {{ upstream_connect_timeout | default('5s') }}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -946,6 +986,9 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
validation_context:
trusted_ca:
filename: {{ upstream_tls_ca_path | default('/etc/ssl/certs/ca-certificates.crt') }}
{% endif %}
{% endfor %}

View file

@ -265,6 +265,12 @@ properties:
type: boolean
use_agent_orchestrator:
type: boolean
upstream_connect_timeout:
type: string
description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'."
upstream_tls_ca_path:
type: string
description: "Path to the trusted CA bundle for upstream TLS verification. Default is '/etc/ssl/certs/ca-certificates.crt'."
system_prompt:
type: string
prompt_targets:

View file

@ -5,7 +5,7 @@ failed_files=()
for file in $(find . -name config.yaml -o -name plano_config_full_reference.yaml); do
echo "Validating ${file}..."
touch $(pwd)/${file}_rendered
if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.7} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then
if ! docker run --rm -v "$(pwd)/${file}:/app/plano_config.yaml:ro" -v "$(pwd)/${file}_rendered:/app/plano_config_rendered.yaml:rw" --entrypoint /bin/sh ${PLANO_DOCKER_IMAGE:-katanemo/plano:0.4.8} -c "python -m planoai.config_generator" 2>&1 > /dev/null ; then
echo "Validation failed for $file"
failed_files+=("$file")
fi

View file

@ -3,15 +3,17 @@ use std::time::Instant;
use bytes::Bytes;
use common::configuration::SpanAttributes;
use common::llm_providers::LlmProviders;
use hermesllm::apis::OpenAIMessage;
use hermesllm::clients::SupportedAPIsFromClient;
use hermesllm::providers::request::ProviderRequest;
use hermesllm::ProviderRequestType;
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::{Request, Response};
use hyper::{Request, Response, StatusCode};
use opentelemetry::trace::get_active_span;
use serde::ser::Error as SerError;
use tokio::sync::RwLock;
use tracing::{debug, info, info_span, warn, Instrument};
use super::agent_selector::{AgentSelectionError, AgentSelector};
@ -42,6 +44,7 @@ pub async fn agent_chat(
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
span_attributes: Arc<Option<SpanAttributes>>,
llm_providers: Arc<RwLock<LlmProviders>>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
let custom_attrs =
collect_custom_trace_attributes(request.headers(), span_attributes.as_ref().as_ref());
@ -75,6 +78,7 @@ pub async fn agent_chat(
orchestrator_service,
agents_list,
listeners,
llm_providers,
request_id,
custom_attrs,
)
@ -160,6 +164,7 @@ async fn handle_agent_chat_inner(
orchestrator_service: Arc<OrchestratorService>,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
request_id: String,
custom_attrs: std::collections::HashMap<String, String>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
@ -230,16 +235,36 @@ async fn handle_agent_chat_inner(
AgentFilterChainError::RequestParsing(serde_json::Error::custom(err_msg))
})?;
let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
Ok(request) => request,
Err(err) => {
warn!("failed to parse request as ProviderRequestType: {}", err);
let err_msg = format!("Failed to parse request: {}", err);
return Err(AgentFilterChainError::RequestParsing(
serde_json::Error::custom(err_msg),
));
let mut client_request =
match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
Ok(request) => request,
Err(err) => {
warn!("failed to parse request as ProviderRequestType: {}", err);
let err_msg = format!("Failed to parse request: {}", err);
return Err(AgentFilterChainError::RequestParsing(
serde_json::Error::custom(err_msg),
));
}
};
// If model is not specified in the request, resolve from default provider
if client_request.model().is_empty() {
match llm_providers.read().await.default() {
Some(default_provider) => {
let default_model = default_provider.name.clone();
info!(default_model = %default_model, "no model specified in request, using default provider");
client_request.set_model(default_model);
}
None => {
let err_msg = "No model specified in request and no default provider configured";
warn!("{}", err_msg);
let mut bad_request =
Response::new(ResponseHandler::create_full_body(err_msg.to_string()));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
}
};
}
let message: Vec<OpenAIMessage> = client_request.get_messages();

View file

@ -162,9 +162,30 @@ async fn llm_chat_inner(
Some(SupportedAPIsFromClient::OpenAIResponsesAPI(_))
);
// If model is not specified in the request, resolve from default provider
let model_from_request = client_request.model().to_string();
let model_from_request = if model_from_request.is_empty() {
match llm_providers.read().await.default() {
Some(default_provider) => {
let default_model = default_provider.name.clone();
info!(default_model = %default_model, "no model specified in request, using default provider");
client_request.set_model(default_model.clone());
default_model
}
None => {
let err_msg = "No model specified in request and no default provider configured";
warn!("{}", err_msg);
let mut bad_request = Response::new(full(err_msg.to_string()));
*bad_request.status_mut() = StatusCode::BAD_REQUEST;
return Ok(bad_request);
}
}
} else {
model_from_request
};
// Model alias resolution: update model field in client_request immediately
// This ensures all downstream objects use the resolved model
let model_from_request = client_request.model().to_string();
let temperature = client_request.get_temperature();
let is_streaming_request = client_request.is_streaming();
let alias_resolved_model = resolve_model_alias(&model_from_request, &model_aliases);

View file

@ -211,6 +211,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
agents_list,
listeners,
span_attributes,
llm_providers,
)
.with_context(parent_cx)
.await;

View file

@ -102,6 +102,7 @@ pub struct McpServer {
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct MessagesRequest {
#[serde(default)]
pub model: String,
pub messages: Vec<MessagesMessage>,
pub max_tokens: u32,

View file

@ -74,6 +74,7 @@ impl ApiDefinition for OpenAIApi {
#[derive(Serialize, Deserialize, Debug, Clone, Default)]
pub struct ChatCompletionsRequest {
pub messages: Vec<Message>,
#[serde(default)]
pub model: String,
// pub audio: Option<Audio> // GOOD FIRST ISSUE: future support for audio input
pub frequency_penalty: Option<f32>,

View file

@ -29,6 +29,7 @@ impl TryFrom<&[u8]> for ResponsesAPIResponse {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ResponsesAPIRequest {
/// The model to use for generating the response
#[serde(default)]
pub model: String,
/// Text, image, or file inputs to the model

View file

@ -12,7 +12,6 @@ listeners:
timeout: 30s
llm_providers:
# Kimi K2.5 — Moonshot AI's open model (1T MoE, 32B active params)
# Great for general conversation, agentic tasks, and multimodal work
# OpenAI-compatible API at $0.60/M input, $2.50/M output tokens
@ -21,13 +20,13 @@ llm_providers:
base_url: https://api.moonshot.ai/v1
default: true
routing_preferences:
- name: code generation
description: generating code, writing scripts, implementing functions, and building tool integrations
- name: general conversation
description: general chat, greetings, casual conversation, Q&A, and everyday questions
# Claude — Anthropic's most capable model
# Best for complex reasoning, code, tool use, and evaluation
- model: anthropic/claude-sonnet-4-5
access_key: $ANTHROPIC_API_KEY
routing_preferences:
- name: general conversation
description: general chat, greetings, casual conversation, Q&A, and everyday questions
- name: code generation
description: generating code, writing scripts, implementing functions, and building tool integrations

View file

@ -15,9 +15,9 @@ Make sure your machine is up to date with [latest version of plano]([url](https:
```bash
(venv) $ planoai up --service plano --foreground
# Or if installed with uv: uvx planoai up --service plano --foreground
2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.7
2025-05-30 18:00:09,953 - planoai.main - INFO - Starting plano cli version: 0.4.8
2025-05-30 18:00:09,953 - planoai.main - INFO - Validating /Users/adilhafeez/src/intelligent-prompt-gateway/demos/llm_routing/preference_based_routing/config.yaml
2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.7
2025-05-30 18:00:10,422 - cli.core - INFO - Starting plano gateway, image name: plano, tag: katanemo/plano:0.4.8
2025-05-30 18:00:10,662 - cli.core - INFO - plano status: running, health status: starting
2025-05-30 18:00:11,712 - cli.core - INFO - plano status: running, health status: starting
2025-05-30 18:00:12,761 - cli.core - INFO - plano is running and is healthy!

View file

@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
project = "Plano Docs"
copyright = "2025, Katanemo Labs, Inc"
author = "Katanemo Labs, Inc"
release = " v0.4.7"
release = " v0.4.8"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

View file

@ -37,7 +37,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
.. code-block:: console
$ uv tool install planoai==0.4.7
$ uv tool install planoai==0.4.8
**Option 2: Install with pip (Traditional)**
@ -45,7 +45,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
$ python -m venv venv
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
$ pip install planoai==0.4.7
$ pip install planoai==0.4.8
.. _llm_routing_quickstart:
@ -90,7 +90,7 @@ Start Plano:
$ planoai up plano_config.yaml
# Or if installed with uv tool: uvx planoai up plano_config.yaml
2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.7
2024-12-05 11:24:51,288 - planoai.main - INFO - Starting plano cli version: 0.4.8
2024-12-05 11:24:51,825 - planoai.utils - INFO - Schema validation successful!
2024-12-05 11:24:51,825 - planoai.main - INFO - Starting plano
...

View file

@ -25,7 +25,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
# docker-compose.yml
services:
plano:
image: katanemo/plano:0.4.7
image: katanemo/plano:0.4.8
container_name: plano
ports:
- "10000:10000" # ingress (client -> plano)

View file

@ -46,6 +46,117 @@ Also, Plano utilizes `Envoy event-based thread model <https://blog.envoyproxy.io
Worker threads rarely share state and operate in a trivially parallel fashion. This threading model
enables scaling to very high core count CPUs.
.. code-block:: text
┌─────────────────────────────────────────────────────────────────────────────────────┐
│ P L A N O │
│ AI-native proxy and data plane for agentic applications │
│ │
│ ┌─────────────────────┐ │
│ │ YOUR CLIENTS │ │
│ │ (apps· agents · UI) │ │
│ └──────────┬──────────┘ │
│ │ │
│ ┌──────────────────────────────┼──────────────────────────┐ │
│ │ │ │ │
│ ┌──────▼──────────┐ ┌─────────▼────────┐ ┌────────▼─────────┐ │
│ │ Agent Port(s) │ │ Model Port │ │ Function-Call │ │
│ │ :8001+ │ │ :12000 │ │ Port :10000 │ │
│ │ │ │ │ │ │ │
│ │ route your │ │ direct LLM │ │ prompt-target / │ │
│ │ prompts to │ │ calls with │ │ tool dispatch │ │
│ │ the right │ │ model-alias │ │ with parameter │ │
│ │ agent │ │ translation │ │ extraction │ │
│ └──────┬──────────┘ └─────────┬────────┘ └────────┬─────────┘ │
│ └──────────────────────────────┼─────────────────────────┘ │
│ │ │
│ ╔══════════════════════════════════════▼══════════════════════════════════════╗ │
│ ║ BRIGHTSTAFF (SUBSYSTEM) — Agentic Control Plane ║ │
│ ║ Async · non-blocking · parallel per-request Tokio tasks ║ │
│ ║ ║ │
│ ║ ┌─────────────────────────────────────────────────────────────────────┐ ║ │
│ ║ │ Agentic ROUTER │ ║ │
│ ║ │ Reads listener config · maps incoming request to execution path │ ║ │
│ ║ │ │ ║ │
│ ║ │ /agents/* ──────────────────────► AGENT PATH │ ║ │
│ ║ │ /v1/chat|messages|responses ──────► LLM PATH │ ║ │
│ ║ └─────────────────────────────────────────────────────────────────────┘ ║ │
│ ║ ║ │
│ ║ ─────────────────────── AGENT PATH ──────────────────────────────────── ║ │
│ ║ ║ │
│ ║ ┌──────────────────────────────────────────────────────────────────────┐ ║ │
│ ║ │ FILTER CHAIN (pipeline_processor.rs) │ ║ │
│ ║ │ │ ║ │
│ ║ │ prompt ──► [input_guards] ──► [query_rewrite] ──► [context_builder] │ ║ │
│ ║ │ guardrails prompt mutation RAG / enrichment │ ║ │
│ ║ │ │ ║ │
│ ║ │ Each filter: HTTP or MCP · can mutate, enrich, or short-circuit │ ║ │
│ ║ └──────────────────────────────────┬───────────────────────────────────┘ ║ │
│ ║ │ ║ │
│ ║ ┌──────────────────────────────────▼───────────────────────────────────┐ ║ │
│ ║ │ AGENT ORCHESTRATOR (agent_chat_completions.rs) │ ║ │
│ ║ │ Select agent · forward enriched request · manage conversation state │ ║ │
│ ║ │ Stream response back · multi-turn aware │ ║ │
│ ║ └──────────────────────────────────────────────────────────────────────┘ ║ │
│ ║ ║ │
│ ║ ─────────────────────── LLM PATH ────────────────────────────────────── ║ │
│ ║ ║ │
│ ║ ┌──────────────────────────────────────────────────────────────────────┐ ║ │
│ ║ │ MODEL ROUTER (llm_router.rs + router_chat.rs) │ ║ │
│ ║ │ Model alias resolution · preference-based provider selection │ ║ │
│ ║ │ "fast-llm" → gpt-4o-mini · "smart-llm" → gpt-4o │ ║ │
│ ║ └──────────────────────────────────────────────────────────────────────┘ ║ │
│ ║ ║ │
│ ║ ─────────────────── ALWAYS ON (every request) ───────────────────────── ║ │
│ ║ ║ │
│ ║ ┌────────────────────┐ ┌─────────────────────┐ ┌──────────────────┐ ║ │
│ ║ │ SIGNALS ANALYZER │ │ STATE STORAGE │ │ OTEL TRACING │ ║ │
│ ║ │ loop detection │ │ memory / postgres │ │ traceparent │ ║ │
│ ║ │ repetition score │ │ /v1/responses │ │ span injection │ ║ │
│ ║ │ quality indicators│ │ stateful API │ │ trace export │ ║ │
│ ║ └────────────────────┘ └─────────────────────┘ └──────────────────┘ ║ │
│ ╚═════════════════════════════════════╤═══════════════════════════════════════╝ │
│ │ │
│ ┌─────────────────────────────────────▼──────────────────────────────────────┐ │
│ │ LLM GATEWAY (llm_gateway.wasm — embedded in Envoy egress filter chain) │ │
│ │ │ │
│ │ Rate limiting · Provider format translation · TTFT metrics │ │
│ │ OpenAI → Anthropic · Gemini · Mistral · Groq · DeepSeek · xAI · Bedrock │ │
│ │ │ │
│ │ Envoy handles beneath this: TLS origination · SNI · retry + backoff │ │
│ │ connection pooling · LOGICAL_DNS · structured access logs │ │
│ └─────────────────────────────────────┬──────────────────────────────────────┘ │
│ │ │
└─────────────────────────────────────────┼───────────────────────────────────────────┘
┌───────────────────────────┼────────────────────────────┐
│ │ │
┌─────────▼──────────┐ ┌────────────▼──────────┐ ┌────────────▼──────────┐
│ LLM PROVIDERS │ │ EXTERNAL AGENTS │ │ TOOL / API BACKENDS │
│ OpenAI · Anthropic│ │ (filter chain svc) │ │ (endpoint clusters) │
│ Gemini · Mistral │ │ HTTP / MCP :10500+ │ │ user-defined hosts │
│ Groq · DeepSeek │ │ input_guards │ │ │
│ xAI · Together.ai │ │ query_rewriter │ │ │
└────────────────────┘ │ context_builder │ └───────────────────────┘
└───────────────────────┘
HOW PLANO IS DIFFERENT
─────────────────────────────────────────────────────────────────────────────────
Brightstaff is the entire agentic brain — one async Rust binary that handles
agent selection, filter chain orchestration, model routing, state, and signals
without blocking a thread per request.
Filter chains are programmable dataplane steps — reusable HTTP/MCP services
you wire into any agent, executing in-path before the agent ever sees the prompt.
The LLM gateway is a zero-overhead WASM plugin inside Envoy — format translation
and rate limiting happen in-process with the proxy, not as a separate service hop.
Envoy provides the transport substrate (TLS, HTTP codecs, retries, connection
pools, access logs) so Plano never reimplements solved infrastructure problems.
Request Flow (Ingress)
----------------------