Use mcp tools for filter chain (#621)

* agents framework demo

* more changes

* add more changes

* pending changes

* fix tests

* fix more

* rebase with main and better handle error from mcp

* add trace for filters

* add test for client error, server error and for mcp error

* update schema validate code and rename kind => type in agent_filter

* fix agent description and pre-commit

* fix tests

* add provider specific request parsing in agents chat

* fix precommit and tests

* cleanup demo

* update readme

* fix pre-commit

* refactor tracing

* fix fmt

* fix: handle MessageContent enum in responses API conversion

- Update request.rs to handle new MessageContent enum structure from main
- MessageContent can now be Text(String) or Items(Vec<InputContent>)
- Handle new InputItem variants (ItemReference, FunctionCallOutput)
- Fixes compilation error after merging latest main (#632)

* address pr feedback

* fix span

* fix build

* update openai version
This commit is contained in:
Adil Hafeez 2025-12-17 17:30:14 -08:00 committed by GitHub
parent cb82a83c7b
commit 2f9121407b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
40 changed files with 4886 additions and 190 deletions

View file

@ -14,6 +14,38 @@ properties:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
additionalProperties: false
required:
- id
- url
filters:
type: array
items:
type: object
properties:
id:
type: string
url:
type: string
type:
type: string
enum:
- mcp
transport:
type: string
enum:
- streamable-http
tool:
type: string
additionalProperties: false
required:
- id
- url
listeners:
oneOf:
- type: array

View file

@ -214,21 +214,21 @@ static_resources:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: tools
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
# {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
# generate_request_id: true
# tracing:
# provider:
# name: envoy.tracers.opentelemetry
# typed_config:
# "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
# grpc_service:
# envoy_grpc:
# cluster_name: opentelemetry_collector
# timeout: 0.250s
# service_name: tools
# random_sampling:
# value: {{ arch_tracing.random_sampling }}
# {% endif %}
stat_prefix: outbound_api_traffic
codec_type: AUTO
scheme_header_transformation:
@ -299,7 +299,7 @@ static_resources:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: arch_gateway
service_name: plano(inbound)
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}

View file

@ -101,8 +101,17 @@ def validate_and_render_schema():
# Process agents section and convert to endpoints
agents = config_yaml.get("agents", [])
for agent in agents:
filters = config_yaml.get("filters", [])
agents_combined = agents + filters
agent_id_keys = set()
for agent in agents_combined:
agent_id = agent.get("id")
if agent_id in agent_id_keys:
raise Exception(
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
)
agent_id_keys.add(agent_id)
agent_endpoint = agent.get("url")
if agent_id and agent_endpoint:

View file

@ -57,6 +57,10 @@ def convert_legacy_listeners(
"timeout": "30s",
}
# Handle None case
if listeners is None:
return [llm_gateway_listener], llm_gateway_listener, prompt_gateway_listener
if isinstance(listeners, dict):
# legacy listeners
# check if type is array or object

View file

@ -94,21 +94,16 @@ def test_validate_and_render_happy_path_agent_config(monkeypatch):
version: v0.3.0
agents:
- name: query_rewriter
kind: openai
endpoint: http://localhost:10500
- name: context_builder
kind: openai
endpoint: http://localhost:10501
- name: response_generator
kind: openai
endpoint: http://localhost:10502
- name: research_agent
kind: openai
endpoint: http://localhost:10500
- name: input_guard_rails
kind: openai
endpoint: http://localhost:10503
- id: query_rewriter
url: http://localhost:10500
- id: context_builder
url: http://localhost:10501
- id: response_generator
url: http://localhost:10502
- id: research_agent
url: http://localhost:10500
- id: input_guard_rails
url: http://localhost:10503
listeners:
- name: tmobile
@ -156,7 +151,7 @@ listeners:
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
]
with mock.patch("builtins.open", m_open):
with mock.patch("config_generator.Environment"):
with mock.patch("cli.config_generator.Environment"):
validate_and_render_schema()

View file

@ -1,16 +1,24 @@
use std::sync::Arc;
use std::time::{Instant, SystemTime};
use bytes::Bytes;
use hermesllm::apis::openai::ChatCompletionsRequest;
use common::consts::TRACE_PARENT_HEADER;
use common::traces::{SpanBuilder, SpanKind, parse_traceparent, generate_random_span_id};
use hermesllm::apis::OpenAIMessage;
use hermesllm::clients::SupportedAPIsFromClient;
use hermesllm::providers::request::ProviderRequest;
use hermesllm::ProviderRequestType;
use http_body_util::combinators::BoxBody;
use http_body_util::BodyExt;
use hyper::{Request, Response};
use serde::ser::Error as SerError;
use tracing::{debug, info, warn};
use super::agent_selector::{AgentSelectionError, AgentSelector};
use super::pipeline_processor::{PipelineError, PipelineProcessor};
use super::response_handler::ResponseHandler;
use crate::router::llm_router::RouterService;
use crate::tracing::{OperationNameBuilder, operation_component, http};
/// Main errors for agent chat completions
#[derive(Debug, thiserror::Error)]
@ -33,8 +41,17 @@ pub async fn agent_chat(
_: String,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
trace_collector: Arc<common::traces::TraceCollector>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
match handle_agent_chat(request, router_service, agents_list, listeners).await {
match handle_agent_chat(
request,
router_service,
agents_list,
listeners,
trace_collector,
)
.await
{
Ok(response) => Ok(response),
Err(err) => {
// Check if this is a client error from the pipeline that should be cascaded
@ -109,10 +126,11 @@ async fn handle_agent_chat(
router_service: Arc<RouterService>,
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
trace_collector: Arc<common::traces::TraceCollector>,
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
// Initialize services
let agent_selector = AgentSelector::new(router_service);
let pipeline_processor = PipelineProcessor::default();
let mut pipeline_processor = PipelineProcessor::default();
let response_handler = ResponseHandler::new();
// Extract listener name from headers
@ -132,6 +150,13 @@ async fn handle_agent_chat(
info!("Handling request for listener: {}", listener.name);
// Parse request body
let request_path = request
.uri()
.path()
.to_string()
.strip_prefix("/agents")
.unwrap()
.to_string();
let request_headers = request.headers().clone();
let chat_request_bytes = request.collect().await?.to_bytes();
@ -140,61 +165,141 @@ async fn handle_agent_chat(
String::from_utf8_lossy(&chat_request_bytes)
);
let chat_completions_request: ChatCompletionsRequest =
serde_json::from_slice(&chat_request_bytes).map_err(|err| {
warn!(
"Failed to parse request body as ChatCompletionsRequest: {}",
err
);
AgentFilterChainError::RequestParsing(err)
// Determine the API type from the endpoint
let api_type =
SupportedAPIsFromClient::from_endpoint(request_path.as_str()).ok_or_else(|| {
let err_msg = format!("Unsupported endpoint: {}", request_path);
warn!("{}", err_msg);
AgentFilterChainError::RequestParsing(serde_json::Error::custom(err_msg))
})?;
let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
Ok(request) => request,
Err(err) => {
warn!("Failed to parse request as ProviderRequestType: {}", err);
let err_msg = format!("Failed to parse request: {}", err);
return Err(AgentFilterChainError::RequestParsing(
serde_json::Error::custom(err_msg),
));
}
};
let message: Vec<OpenAIMessage> = client_request.get_messages();
// let chat_completions_request: ChatCompletionsRequest =
// serde_json::from_slice(&chat_request_bytes).map_err(|err| {
// warn!(
// "Failed to parse request body as ChatCompletionsRequest: {}",
// err
// );
// AgentFilterChainError::RequestParsing(err)
// })?;
// Extract trace parent for routing
let trace_parent = request_headers
.iter()
.find(|(key, _)| key.as_str() == "traceparent")
.find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
// Select appropriate agent using arch router llm model
let selected_agent = agent_selector
.select_agent(&chat_completions_request.messages, &listener, trace_parent)
.await?;
debug!("Processing agent pipeline: {}", selected_agent.id);
// Create agent map for pipeline processing
// Create agent map for pipeline processing and agent selection
let agent_map = {
let agents = agents_list.read().await;
let agents = agents.as_ref().unwrap();
agent_selector.create_agent_map(agents)
};
// Parse trace parent to get trace_id and parent_span_id
let (trace_id, parent_span_id) = if let Some(ref tp) = trace_parent {
parse_traceparent(tp)
} else {
(String::new(), None)
};
// Select appropriate agent using arch router llm model
let selected_agent = agent_selector
.select_agent(&message, &listener, trace_parent.clone())
.await?;
debug!("Processing agent pipeline: {}", selected_agent.id);
// Record the start time for agent span
let agent_start_time = SystemTime::now();
let agent_start_instant = Instant::now();
// let (span_id, trace_id) = trace_collector.start_span(
// trace_parent.clone(),
// operation_component::AGENT,
// &format!("/agents{}", request_path),
// &selected_agent.id,
// );
let span_id = generate_random_span_id();
// Process the filter chain
let processed_messages = pipeline_processor
let chat_history = pipeline_processor
.process_filter_chain(
&chat_completions_request,
&message,
&selected_agent,
&agent_map,
&request_headers,
Some(&trace_collector),
trace_id.clone(),
span_id.clone(),
)
.await?;
// Get terminal agent and send final response
let terminal_agent_name = selected_agent.id;
let terminal_agent_name = selected_agent.id.clone();
let terminal_agent = agent_map.get(&terminal_agent_name).unwrap();
debug!("Processing terminal agent: {}", terminal_agent_name);
debug!("Terminal agent details: {:?}", terminal_agent);
let llm_response = pipeline_processor
.invoke_upstream_agent(
&processed_messages,
&chat_completions_request,
.invoke_agent(
&chat_history,
client_request,
terminal_agent,
&request_headers,
trace_id.clone(),
span_id.clone(),
)
.await?;
// Record agent span after processing is complete
let agent_end_time = SystemTime::now();
let agent_elapsed = agent_start_instant.elapsed();
// Build full path with /agents prefix
let full_path = format!("/agents{}", request_path);
// Build operation name: POST {full_path} {agent_name}
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path(&full_path)
.with_target(&terminal_agent_name)
.build();
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id)
.with_kind(SpanKind::Internal)
.with_start_time(agent_start_time)
.with_end_time(agent_end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, full_path)
.with_attribute("agent.name", terminal_agent_name.clone())
.with_attribute("duration_ms", format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0));
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id);
}
if let Some(parent_id) = parent_span_id {
span_builder = span_builder.with_parent_span_id(parent_id);
}
let span = span_builder.build();
// Use plano(agent) as service name for the agent processing span
trace_collector.record_span(operation_component::AGENT, span);
// Create streaming response
response_handler
.create_streaming_response(llm_response)

View file

@ -20,6 +20,8 @@ pub enum AgentSelectionError {
RoutingError(String),
#[error("Default agent not found for listener: {0}")]
DefaultAgentNotFound(String),
#[error("MCP client error: {0}")]
McpError(String),
}
/// Service for selecting agents based on routing preferences and listener configuration
@ -29,7 +31,9 @@ pub struct AgentSelector {
impl AgentSelector {
pub fn new(router_service: Arc<RouterService>) -> Self {
Self { router_service }
Self {
router_service,
}
}
/// Find listener by name from the request headers
@ -77,7 +81,9 @@ impl AgentSelector {
return Ok(agents[0].clone());
}
let usage_preferences = self.convert_agent_description_to_routing_preferences(agents);
let usage_preferences = self
.convert_agent_description_to_routing_preferences(agents)
.await;
debug!(
"Agents usage preferences for agent routing str: {}",
serde_json::to_string(&usage_preferences).unwrap_or_default()
@ -131,20 +137,23 @@ impl AgentSelector {
}
/// Convert agent descriptions to routing preferences
fn convert_agent_description_to_routing_preferences(
async fn convert_agent_description_to_routing_preferences(
&self,
agents: &[AgentFilterChain],
) -> Vec<ModelUsagePreference> {
agents
.iter()
.map(|agent| ModelUsagePreference {
model: agent.id.clone(),
let mut preferences = Vec::new();
for agent_chain in agents {
preferences.push(ModelUsagePreference {
model: agent_chain.id.clone(),
routing_preferences: vec![RoutingPreference {
name: agent.id.clone(),
description: agent.description.as_ref().unwrap_or(&String::new()).clone(),
name: agent_chain.id.clone(),
description: agent_chain.description.clone().unwrap_or_default(),
}],
})
.collect()
});
}
preferences
}
}
@ -183,8 +192,10 @@ mod tests {
fn create_test_agent_struct(name: &str) -> Agent {
Agent {
id: name.to_string(),
kind: Some("test".to_string()),
agent_type: Some("test".to_string()),
url: "http://localhost:8080".to_string(),
tool: None,
transport: None,
}
}
@ -240,8 +251,8 @@ mod tests {
assert!(agent_map.contains_key("agent2"));
}
#[test]
fn test_convert_agent_description_to_routing_preferences() {
#[tokio::test]
async fn test_convert_agent_description_to_routing_preferences() {
let router_service = create_test_router_service();
let selector = AgentSelector::new(router_service);
@ -250,7 +261,9 @@ mod tests {
create_test_agent("agent2", "Second agent description", false),
];
let preferences = selector.convert_agent_description_to_routing_preferences(&agents);
let preferences = selector
.convert_agent_description_to_routing_preferences(&agents)
.await;
assert_eq!(preferences.len(), 2);
assert_eq!(preferences[0].model, "agent1");

View file

@ -42,19 +42,23 @@ mod integration_tests {
// Setup services
let router_service = create_test_router_service();
let agent_selector = AgentSelector::new(router_service);
let pipeline_processor = PipelineProcessor::default();
let mut pipeline_processor = PipelineProcessor::default();
// Create test data
let agents = vec![
Agent {
id: "filter-agent".to_string(),
kind: Some("filter".to_string()),
agent_type: Some("filter".to_string()),
url: "http://localhost:8081".to_string(),
tool: None,
transport: None,
},
Agent {
id: "terminal-agent".to_string(),
kind: Some("terminal".to_string()),
agent_type: Some("terminal".to_string()),
url: "http://localhost:8082".to_string(),
tool: None,
transport: None,
},
];
@ -107,7 +111,15 @@ mod integration_tests {
let headers = HeaderMap::new();
let result = pipeline_processor
.process_filter_chain(&request, &test_pipeline, &agent_map, &headers)
.process_filter_chain(
&request.messages,
&test_pipeline,
&agent_map,
&headers,
None,
String::new(),
String::new(),
)
.await;
println!("Pipeline processing result: {:?}", result);

View file

@ -0,0 +1,49 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
pub const JSON_RPC_VERSION: &str = "2.0";
pub const TOOL_CALL_METHOD : &str = "tools/call";
pub const MCP_INITIALIZE: &str = "initialize";
pub const MCP_INITIALIZE_NOTIFICATION: &str = "initialize/notification";
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JsonRpcId {
String(String),
Number(u64),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcRequest {
pub jsonrpc: String,
pub id: JsonRpcId,
pub method: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcNotification {
pub jsonrpc: String,
pub method: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcError {
pub code: i32,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub data: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcResponse {
pub jsonrpc: String,
pub id: JsonRpcId,
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<HashMap<String, serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<JsonRpcError>,
}

View file

@ -7,6 +7,7 @@ pub mod function_calling;
pub mod pipeline_processor;
pub mod response_handler;
pub mod utils;
pub mod jsonrpc;
#[cfg(test)]
mod integration_tests;

View file

@ -1,10 +1,24 @@
use std::collections::HashMap;
use common::configuration::{Agent, AgentFilterChain};
use common::consts::{ARCH_UPSTREAM_HOST_HEADER, ENVOY_RETRY_HEADER};
use hermesllm::apis::openai::{ChatCompletionsRequest, Message};
use common::consts::{
ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
};
use common::traces::{SpanBuilder, SpanKind, generate_random_span_id};
use hermesllm::apis::openai::Message;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hyper::header::HeaderMap;
use tracing::{debug, warn};
use std::time::{Instant, SystemTime};
use tracing::{debug, info, warn};
use crate::tracing::operation_component::{self};
use crate::tracing::{http, OperationNameBuilder};
use crate::handlers::jsonrpc::{
JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
};
use uuid::Uuid;
/// Errors that can occur during pipeline processing
#[derive(Debug, thiserror::Error)]
@ -19,6 +33,12 @@ pub enum PipelineError {
NoChoicesInResponse(String),
#[error("No content in response from agent '{0}'")]
NoContentInResponse(String),
#[error("No result in response from agent '{0}'")]
NoResultInResponse(String),
#[error("No structured content in response from agent '{0}'")]
NoStructuredContentInResponse(String),
#[error("No messages in response from agent '{0}'")]
NoMessagesInResponse(String),
#[error("Client error from agent '{agent}' (HTTP {status}): {body}")]
ClientError {
agent: String,
@ -37,13 +57,17 @@ pub enum PipelineError {
pub struct PipelineProcessor {
client: reqwest::Client,
url: String,
agent_id_session_map: HashMap<String, String>,
}
const ENVOY_API_ROUTER_ADDRESS: &str = "http://localhost:11000";
impl Default for PipelineProcessor {
fn default() -> Self {
Self {
client: reqwest::Client::new(),
url: "http://localhost:11000/v1/chat/completions".to_string(),
url: ENVOY_API_ROUTER_ADDRESS.to_string(),
agent_id_session_map: HashMap::new(),
}
}
}
@ -53,18 +77,128 @@ impl PipelineProcessor {
Self {
client: reqwest::Client::new(),
url,
agent_id_session_map: HashMap::new(),
}
}
/// Record a span for filter execution
fn record_filter_span(
&self,
collector: &std::sync::Arc<common::traces::TraceCollector>,
agent_name: &str,
tool_name: &str,
start_time: SystemTime,
end_time: SystemTime,
elapsed: std::time::Duration,
trace_id: String,
parent_span_id: String,
span_id: String,
) -> String {
// let (trace_id, parent_span_id) = self.extract_trace_context();
// Build operation name: POST /agents/* {filter_name}
// Using generic path since we don't have access to specific endpoint here
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path("/agents/*")
.with_target(agent_name)
.build();
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id.clone())
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_end_time(end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, "/agents/*")
.with_attribute("filter.name", agent_name.to_string())
.with_attribute("filter.tool_name", tool_name.to_string())
.with_attribute(
"duration_ms",
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
);
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id);
}
if !parent_span_id.is_empty() {
span_builder = span_builder.with_parent_span_id(parent_span_id);
}
let span = span_builder.build();
// Use plano(filter) as service name for filter execution spans
collector.record_span(operation_component::AGENT_FILTER, span);
span_id.clone()
}
/// Record a span for MCP protocol interactions
fn record_mcp_span(
&self,
collector: &std::sync::Arc<common::traces::TraceCollector>,
operation: &str,
agent_id: &str,
start_time: SystemTime,
end_time: SystemTime,
elapsed: std::time::Duration,
additional_attrs: Option<HashMap<&str, String>>,
trace_id: String,
parent_span_id: String,
span_id: Option<String>,
) {
// let (trace_id, parent_span_id) = self.extract_trace_context();
// Build operation name: POST /mcp {agent_id}
let operation_name = OperationNameBuilder::new()
.with_method("POST")
.with_path("/mcp")
.with_operation(operation)
.with_target(agent_id)
.build();
let mut span_builder = SpanBuilder::new(&operation_name)
.with_span_id(span_id.unwrap_or_else(|| generate_random_span_id()))
.with_kind(SpanKind::Client)
.with_start_time(start_time)
.with_end_time(end_time)
.with_attribute(http::METHOD, "POST")
.with_attribute(http::TARGET, &format!("/mcp ({})", operation.to_string()))
.with_attribute("mcp.operation", operation.to_string())
.with_attribute("mcp.agent_id", agent_id.to_string())
.with_attribute(
"duration_ms",
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
);
if let Some(attrs) = additional_attrs {
for (key, value) in attrs {
span_builder = span_builder.with_attribute(key, value);
}
}
if !trace_id.is_empty() {
span_builder = span_builder.with_trace_id(trace_id);
}
if !parent_span_id.is_empty() {
span_builder = span_builder.with_parent_span_id(parent_span_id);
}
let span = span_builder.build();
// MCP spans also use plano(filter) service name as they are part of filter operations
collector.record_span(operation_component::AGENT_FILTER, span);
}
/// Process the filter chain of agents (all except the terminal agent)
pub async fn process_filter_chain(
&self,
initial_request: &ChatCompletionsRequest,
&mut self,
chat_history: &[Message],
agent_filter_chain: &AgentFilterChain,
agent_map: &HashMap<String, Agent>,
request_headers: &HeaderMap,
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
trace_id: String,
parent_span_id: String,
) -> Result<Vec<Message>, PipelineError> {
let mut chat_completions_history = initial_request.messages.clone();
let mut chat_history_updated = chat_history.to_vec();
for agent_name in &agent_filter_chain.filter_chain {
debug!("Processing filter agent: {}", agent_name);
@ -73,123 +207,490 @@ impl PipelineProcessor {
.get(agent_name)
.ok_or_else(|| PipelineError::AgentNotFound(agent_name.clone()))?;
debug!("Agent details: {:?}", agent);
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
let response_content = self
.send_agent_filter_chain_request(
&chat_completions_history,
initial_request,
info!(
"executing filter: {}/{}, url: {}, conversation length: {}",
agent_name,
tool_name,
agent.url,
chat_history.len()
);
let start_time = SystemTime::now();
let start_instant = Instant::now();
// Generate filter span ID before execution so MCP spans can use it as parent
let filter_span_id = generate_random_span_id();
chat_history_updated = self
.execute_filter(
&chat_history_updated,
agent,
request_headers,
trace_collector,
trace_id.clone(),
filter_span_id.clone(),
)
.await?;
debug!("Received response from filter agent {}", agent_name);
let end_time = SystemTime::now();
let elapsed = start_instant.elapsed();
// Parse the response content as new message history
chat_completions_history =
serde_json::from_str(&response_content).inspect_err(|err| {
warn!(
"Failed to parse response from agent {}, err: {}, response: {}",
agent_name, err, response_content
)
})?;
info!(
"Filter '{}' completed in {:.2}ms, updated conversation length: {}",
agent_name,
elapsed.as_secs_f64() * 1000.0,
chat_history_updated.len()
);
// Record span for this filter execution
if let Some(collector) = trace_collector {
self.record_filter_span(
collector,
agent_name,
tool_name,
start_time,
end_time,
elapsed,
trace_id.clone(),
parent_span_id.clone(),
filter_span_id,
);
}
}
Ok(chat_completions_history)
Ok(chat_history_updated)
}
/// Send request to a specific agent and return the response content
async fn send_agent_filter_chain_request(
/// Build common MCP headers for requests
fn build_mcp_headers(
&self,
messages: &[Message],
original_request: &ChatCompletionsRequest,
agent: &Agent,
request_headers: &HeaderMap,
) -> Result<String, PipelineError> {
let mut request = original_request.clone();
request.messages = messages.to_vec();
agent_id: &str,
session_id: Option<&str>,
trace_id: String,
parent_span_id: String,
) -> Result<HeaderMap, PipelineError> {
let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
let mut headers = request_headers.clone();
headers.remove(hyper::header::CONTENT_LENGTH);
let request_body = serde_json::to_string(&request)?;
debug!("Sending request to agent {}", agent.id);
let mut agent_headers = request_headers.clone();
agent_headers.remove(hyper::header::CONTENT_LENGTH);
agent_headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
hyper::header::HeaderValue::from_str(&agent.id)
.map_err(|_| PipelineError::AgentNotFound(agent.id.clone()))?,
headers.remove(TRACE_PARENT_HEADER);
headers.insert(
TRACE_PARENT_HEADER,
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
);
agent_headers.insert(
headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
hyper::header::HeaderValue::from_str(agent_id)
.map_err(|_| PipelineError::AgentNotFound(agent_id.to_string()))?,
);
headers.insert(
ENVOY_RETRY_HEADER,
hyper::header::HeaderValue::from_str("3").unwrap(),
);
headers.insert(
"Accept",
hyper::header::HeaderValue::from_static("application/json, text/event-stream"),
);
headers.insert(
"Content-Type",
hyper::header::HeaderValue::from_static("application/json"),
);
if let Some(sid) = session_id {
headers.insert(
"mcp-session-id",
hyper::header::HeaderValue::from_str(sid).unwrap(),
);
}
Ok(headers)
}
/// Parse SSE formatted response and extract JSON-RPC data
fn parse_sse_response(
&self,
response_bytes: &[u8],
agent_id: &str,
) -> Result<String, PipelineError> {
let response_str = String::from_utf8_lossy(response_bytes);
let lines: Vec<&str> = response_str.lines().collect();
// Validate SSE format: first line should be "event: message"
if lines.is_empty() || lines[0] != "event: message" {
warn!(
"Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
agent_id,
lines.first()
);
return Err(PipelineError::NoContentInResponse(format!(
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
agent_id
)));
}
// Find the data line
let data_lines: Vec<&str> = lines
.iter()
.filter(|line| line.starts_with("data: "))
.copied()
.collect();
if data_lines.len() != 1 {
warn!(
"Expected exactly one 'data:' line from agent {}, found {}",
agent_id,
data_lines.len()
);
return Err(PipelineError::NoContentInResponse(format!(
"Expected exactly one 'data:' line from agent {}, found {}",
agent_id,
data_lines.len()
)));
}
// Skip "data: " prefix
Ok(data_lines[0][6..].to_string())
}
/// Send an MCP request and return the response
async fn send_mcp_request(
&self,
json_rpc_request: &JsonRpcRequest,
headers: HeaderMap,
agent_id: &str,
) -> Result<reqwest::Response, PipelineError> {
let request_body = serde_json::to_string(json_rpc_request)?;
debug!(
"Sending MCP request to agent {}: {}",
agent_id, request_body
);
let response = self
.client
.post(&self.url)
.headers(agent_headers)
.post(format!("{}/mcp", self.url))
.headers(headers)
.body(request_body)
.send()
.await?;
let status = response.status();
Ok(response)
}
/// Build a tools/call JSON-RPC request
fn build_tool_call_request(
&self,
tool_name: &str,
messages: &[Message],
) -> Result<JsonRpcRequest, PipelineError> {
let mut arguments = HashMap::new();
arguments.insert("messages".to_string(), serde_json::to_value(messages)?);
let mut params = HashMap::new();
params.insert("name".to_string(), serde_json::to_value(tool_name)?);
params.insert("arguments".to_string(), serde_json::to_value(arguments)?);
Ok(JsonRpcRequest {
jsonrpc: JSON_RPC_VERSION.to_string(),
id: JsonRpcId::String(Uuid::new_v4().to_string()),
method: TOOL_CALL_METHOD.to_string(),
params: Some(params),
})
}
/// Send request to a specific agent and return the response content
async fn execute_filter(
&mut self,
messages: &[Message],
agent: &Agent,
request_headers: &HeaderMap,
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
trace_id: String,
filter_span_id: String,
) -> Result<Vec<Message>, PipelineError> {
// Get or create MCP session
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
session_id.clone()
} else {
let session_id = self
.get_new_session_id(
&agent.id,
trace_id.clone(),
filter_span_id.clone(),
)
.await;
self.agent_id_session_map
.insert(agent.id.clone(), session_id.clone());
session_id
};
info!(
"Using MCP session ID {} for agent {}",
mcp_session_id, agent.id
);
// Build JSON-RPC request
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;
// Generate span ID for this MCP tool call (child of filter span)
let mcp_span_id = generate_random_span_id();
// Build headers
let agent_headers =
self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id), trace_id.clone(), mcp_span_id.clone())?;
// Send request with tracing
let start_time = SystemTime::now();
let start_instant = Instant::now();
let response = self
.send_mcp_request(
&json_rpc_request,
agent_headers,
&agent.id,
)
.await?;
let http_status = response.status();
let response_bytes = response.bytes().await?;
// Check for HTTP errors and handle them appropriately
if !status.is_success() {
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
let end_time = SystemTime::now();
let elapsed = start_instant.elapsed();
if status.is_client_error() {
// 4xx errors - cascade back to developer
return Err(PipelineError::ClientError {
agent: agent.id.clone(),
status: status.as_u16(),
body: error_body,
});
} else if status.is_server_error() {
// 5xx errors - server/agent error
return Err(PipelineError::ServerError {
agent: agent.id.clone(),
status: status.as_u16(),
body: error_body,
});
}
// Record MCP tool call span
if let Some(collector) = trace_collector {
let mut attrs = HashMap::new();
attrs.insert("mcp.method", "tools/call".to_string());
attrs.insert("mcp.tool_name", tool_name.to_string());
attrs.insert("mcp.session_id", mcp_session_id.clone());
attrs.insert("http.status_code", http_status.as_u16().to_string());
self.record_mcp_span(
collector,
"tool_call",
&agent.id,
start_time,
end_time,
elapsed,
Some(attrs),
trace_id.clone(),
filter_span_id.clone(),
Some(mcp_span_id),
);
}
// Parse the response as JSON to extract the content
let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
// Handle HTTP errors
if !http_status.is_success() {
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
return Err(if http_status.is_client_error() {
PipelineError::ClientError {
agent: agent.id.clone(),
status: http_status.as_u16(),
body: error_body,
}
} else {
PipelineError::ServerError {
agent: agent.id.clone(),
status: http_status.as_u16(),
body: error_body,
}
});
}
let content = response_json
.get("choices")
.and_then(|choices| choices.as_array())
.and_then(|choices| choices.first())
.and_then(|choice| choice.get("message"))
.and_then(|message| message.get("content"))
.and_then(|content| content.as_str())
.ok_or_else(|| PipelineError::NoContentInResponse(agent.id.clone()))?
info!(
"Response from agent {}: {}",
agent.id,
String::from_utf8_lossy(&response_bytes)
);
// Parse SSE response
let data_chunk = self.parse_sse_response(&response_bytes, &agent.id)?;
let response: JsonRpcResponse = serde_json::from_str(&data_chunk)?;
let response_result = response
.result
.ok_or_else(|| PipelineError::NoResultInResponse(agent.id.clone()))?;
// Check if error field is set in response result
if response_result
.get("isError")
.and_then(|v| v.as_bool())
.unwrap_or(false)
{
let error_message = response_result
.get("content")
.and_then(|v| v.as_array())
.and_then(|arr| arr.first())
.and_then(|v| v.get("text"))
.and_then(|v| v.as_str())
.unwrap_or("unknown_error")
.to_string();
return Err(PipelineError::ClientError {
agent: agent.id.clone(),
status: http_status.as_u16(),
body: error_message,
});
}
// Extract structured content and parse messages
let response_json = response_result
.get("structuredContent")
.ok_or_else(|| PipelineError::NoStructuredContentInResponse(agent.id.clone()))?;
let messages: Vec<Message> = response_json
.get("result")
.and_then(|v| v.as_array())
.ok_or_else(|| PipelineError::NoMessagesInResponse(agent.id.clone()))?
.iter()
.map(|msg_value| serde_json::from_value(msg_value.clone()))
.collect::<Result<Vec<Message>, _>>()
.map_err(PipelineError::ParseError)?;
Ok(messages)
}
/// Build an initialize JSON-RPC request
fn build_initialize_request(&self) -> JsonRpcRequest {
JsonRpcRequest {
jsonrpc: JSON_RPC_VERSION.to_string(),
id: JsonRpcId::String(Uuid::new_v4().to_string()),
method: MCP_INITIALIZE.to_string(),
params: Some({
let mut params = HashMap::new();
params.insert(
"protocolVersion".to_string(),
serde_json::Value::String("2024-11-05".to_string()),
);
params.insert("capabilities".to_string(), serde_json::json!({}));
params.insert(
"clientInfo".to_string(),
serde_json::json!({
"name": BRIGHT_STAFF_SERVICE_NAME,
"version": "1.0.0"
}),
);
params
}),
}
}
/// Send initialized notification after session creation
async fn send_initialized_notification(
&self,
agent_id: &str,
session_id: &str,
trace_id: String,
parent_span_id: String,
) -> Result<(), PipelineError> {
let initialized_notification = JsonRpcNotification {
jsonrpc: JSON_RPC_VERSION.to_string(),
method: MCP_INITIALIZE_NOTIFICATION.to_string(),
params: None,
};
let notification_body = serde_json::to_string(&initialized_notification)?;
debug!("Sending initialized notification for agent {}", agent_id);
let headers = self.build_mcp_headers(&HeaderMap::new(), agent_id, Some(session_id), trace_id.clone(), parent_span_id.clone())?;
let response = self
.client
.post(format!("{}/mcp", self.url))
.headers(headers)
.body(notification_body)
.send()
.await?;
info!(
"Initialized notification response status: {}",
response.status()
);
Ok(())
}
async fn get_new_session_id(
&self,
agent_id: &str,
trace_id: String,
parent_span_id: String,
) -> String {
info!("Initializing MCP session for agent {}", agent_id);
let initialize_request = self.build_initialize_request();
let headers = self
.build_mcp_headers(&HeaderMap::new(), agent_id, None, trace_id.clone(), parent_span_id.clone())
.expect("Failed to build headers for initialization");
let response = self
.send_mcp_request(&initialize_request, headers, agent_id)
.await
.expect("Failed to initialize MCP session");
info!("Initialize response status: {}", response.status());
let session_id = response
.headers()
.get("mcp-session-id")
.and_then(|v| v.to_str().ok())
.expect("No mcp-session-id in response")
.to_string();
Ok(content)
info!(
"Created new MCP session for agent {}: {}",
agent_id, session_id
);
// Send initialized notification
self.send_initialized_notification(
agent_id,
&session_id,
trace_id.clone(),
parent_span_id.clone(),
)
.await
.expect("Failed to send initialized notification");
session_id
}
/// Send request to terminal agent and return the raw response for streaming
pub async fn invoke_upstream_agent(
pub async fn invoke_agent(
&self,
messages: &[Message],
original_request: &ChatCompletionsRequest,
mut original_request: ProviderRequestType,
terminal_agent: &Agent,
request_headers: &HeaderMap,
trace_id: String,
agent_span_id: String,
) -> Result<reqwest::Response, PipelineError> {
let mut request = original_request.clone();
request.messages = messages.to_vec();
// let mut request = original_request.clone();
original_request.set_messages(messages);
let request_body = serde_json::to_string(&request)?;
let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
// let request_body = serde_json::to_string(&request)?;
debug!("Sending request to terminal agent {}", terminal_agent.id);
let mut agent_headers = request_headers.clone();
agent_headers.remove(hyper::header::CONTENT_LENGTH);
// Set traceparent header to make the egress span a child of the agent span
if !trace_id.is_empty() && !agent_span_id.is_empty() {
let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
agent_headers.remove(TRACE_PARENT_HEADER);
agent_headers.insert(
TRACE_PARENT_HEADER,
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
);
}
agent_headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
hyper::header::HeaderValue::from_str(&terminal_agent.id)
@ -203,7 +704,7 @@ impl PipelineProcessor {
let response = self
.client
.post(&self.url)
.post(format!("{}/v1/chat/completions", self.url))
.headers(agent_headers)
.body(request_body)
.send()
@ -217,6 +718,7 @@ impl PipelineProcessor {
mod tests {
use super::*;
use hermesllm::apis::openai::{Message, MessageContent, Role};
use mockito::Server;
use std::collections::HashMap;
fn create_test_message(role: Role, content: &str) -> Message {
@ -240,23 +742,149 @@ mod tests {
#[tokio::test]
async fn test_agent_not_found_error() {
let processor = PipelineProcessor::default();
let mut processor = PipelineProcessor::default();
let agent_map = HashMap::new();
let request_headers = HeaderMap::new();
let initial_request = ChatCompletionsRequest {
messages: vec![create_test_message(Role::User, "Hello")],
model: "test-model".to_string(),
..Default::default()
};
let messages = vec![create_test_message(Role::User, "Hello")];
let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);
let result = processor
.process_filter_chain(&initial_request, &pipeline, &agent_map, &request_headers)
.process_filter_chain(&messages, &pipeline, &agent_map, &request_headers, None, String::new(), String::new())
.await;
assert!(result.is_err());
matches!(result.unwrap_err(), PipelineError::AgentNotFound(_));
}
#[tokio::test]
async fn test_execute_filter_http_status_error() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/mcp")
.with_status(500)
.with_body("boom")
.create();
let server_url = server.url();
let mut processor = PipelineProcessor::new(server_url.clone());
processor
.agent_id_session_map
.insert("agent-1".to_string(), "session-1".to_string());
let agent = Agent {
id: "agent-1".to_string(),
transport: None,
tool: None,
url: server_url,
agent_type: None,
};
let messages = vec![create_test_message(Role::User, "Hello")];
let request_headers = HeaderMap::new();
let result = processor
.execute_filter(&messages, &agent, &request_headers, None, "trace-123".to_string(), "span-123".to_string())
.await;
match result {
Err(PipelineError::ServerError { status, body, .. }) => {
assert_eq!(status, 500);
assert_eq!(body, "boom");
}
_ => panic!("Expected server error for 500 status"),
}
}
#[tokio::test]
async fn test_execute_filter_http_client_error() {
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/mcp")
.with_status(400)
.with_body("bad request")
.create();
let server_url = server.url();
let mut processor = PipelineProcessor::new(server_url.clone());
processor
.agent_id_session_map
.insert("agent-3".to_string(), "session-3".to_string());
let agent = Agent {
id: "agent-3".to_string(),
transport: None,
tool: None,
url: server_url,
agent_type: None,
};
let messages = vec![create_test_message(Role::User, "Ping")];
let request_headers = HeaderMap::new();
let result = processor
.execute_filter(&messages, &agent, &request_headers, None, "trace-456".to_string(), "span-456".to_string())
.await;
match result {
Err(PipelineError::ClientError { status, body, .. }) => {
assert_eq!(status, 400);
assert_eq!(body, "bad request");
}
_ => panic!("Expected client error for 400 status"),
}
}
#[tokio::test]
async fn test_execute_filter_mcp_error_flag() {
let rpc_body = serde_json::json!({
"jsonrpc": JSON_RPC_VERSION,
"id": "1",
"result": {
"isError": true,
"content": [
{ "text": "bad tool call" }
]
}
});
let sse_body = format!("event: message\ndata: {}\n\n", rpc_body.to_string());
let mut server = Server::new_async().await;
let _m = server
.mock("POST", "/mcp")
.with_status(200)
.with_body(sse_body)
.create();
let server_url = server.url();
let mut processor = PipelineProcessor::new(server_url.clone());
processor
.agent_id_session_map
.insert("agent-2".to_string(), "session-2".to_string());
let agent = Agent {
id: "agent-2".to_string(),
transport: None,
tool: None,
url: server_url,
agent_type: None,
};
let messages = vec![create_test_message(Role::User, "Hi")];
let request_headers = HeaderMap::new();
let result = processor
.execute_filter(&messages, &agent, &request_headers, None, "trace-789".to_string(), "span-789".to_string())
.await;
match result {
Err(PipelineError::ClientError { status, body, .. }) => {
assert_eq!(status, 200);
assert_eq!(body, "bad tool call");
}
_ => panic!("Expected client error when isError flag is set"),
}
}
}

View file

@ -1,14 +1,14 @@
use brightstaff::handlers::agent_chat_completions::agent_chat;
use brightstaff::handlers::function_calling::function_calling_chat_handler;
use brightstaff::handlers::llm::llm_chat;
use brightstaff::handlers::models::list_models;
use brightstaff::handlers::function_calling::{function_calling_chat_handler};
use brightstaff::router::llm_router::RouterService;
use brightstaff::state::StateStorage;
use brightstaff::state::postgresql::PostgreSQLConversationStorage;
use brightstaff::state::memory::MemoryConversationalStorage;
use brightstaff::utils::tracing::init_tracer;
use bytes::Bytes;
use common::configuration::Configuration;
use common::configuration::{Agent, Configuration};
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
use common::traces::TraceCollector;
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
@ -63,8 +63,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let arch_config = Arc::new(config);
// combine agents and filters into a single list of agents
let all_agents: Vec<Agent> = arch_config
.agents
.as_deref()
.unwrap_or_default()
.iter()
.chain(arch_config.filters.as_deref().unwrap_or_default())
.cloned()
.collect();
let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
let agents_list = Arc::new(RwLock::new(arch_config.agents.clone()));
let combined_agents_filters_list = Arc::new(RwLock::new(Some(all_agents)));
let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
let llm_provider_url =
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
@ -98,7 +108,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
info!("Tracing configuration found in arch_config.yaml");
Some(true)
} else {
info!("No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var");
info!(
"No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var"
);
None
};
let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
@ -142,11 +154,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let io = TokioIo::new(stream);
let router_service: Arc<RouterService> = Arc::clone(&router_service);
let model_aliases: Arc<Option<std::collections::HashMap<String, common::configuration::ModelAlias>>> = Arc::clone(&model_aliases);
let model_aliases: Arc<
Option<std::collections::HashMap<String, common::configuration::ModelAlias>>,
> = Arc::clone(&model_aliases);
let llm_provider_url = llm_provider_url.clone();
let llm_providers = llm_providers.clone();
let agents_list = agents_list.clone();
let agents_list = combined_agents_filters_list.clone();
let listeners = listeners.clone();
let trace_collector = trace_collector.clone();
let state_storage = state_storage.clone();
@ -162,28 +176,36 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let state_storage = state_storage.clone();
async move {
match (req.method(), req.uri().path()) {
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
let fully_qualified_url =
format!("{}{}", llm_provider_url, req.uri().path());
llm_chat(req, router_service, fully_qualified_url, model_aliases, llm_providers, trace_collector, state_storage)
.with_context(parent_cx)
.await
}
(&Method::POST, "/agents/v1/chat/completions") => {
let fully_qualified_url =
format!("{}{}", llm_provider_url, req.uri().path());
agent_chat(
let path = req.uri().path();
// Check if path starts with /agents
if path.starts_with("/agents") {
// Check if it matches one of the agent API paths
let stripped_path = path.strip_prefix("/agents").unwrap();
if matches!(
stripped_path,
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
) {
let fully_qualified_url = format!("{}{}", llm_provider_url, stripped_path);
return agent_chat(
req,
router_service,
fully_qualified_url,
agents_list,
listeners,
trace_collector,
)
.with_context(parent_cx)
.await
.await;
}
}
match (req.method(), path) {
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
let fully_qualified_url =
format!("{}{}", llm_provider_url, path);
llm_chat(req, router_service, fully_qualified_url, model_aliases, llm_providers, trace_collector, state_storage)
.with_context(parent_cx)
.await
}
(&Method::POST, "/function_calling") => {
let fully_qualified_url =
format!("{}{}", llm_provider_url, "/v1/chat/completions");

View file

@ -157,7 +157,7 @@ pub mod operation_component {
pub const HANDOFF: &str = "plano(handoff)";
/// Agent filter execution
pub const AGENT_FILTER: &str = "plano(agent filter)";
pub const AGENT_FILTER: &str = "plano(filter)";
/// Agent execution
pub const AGENT: &str = "plano(agent)";
@ -203,6 +203,7 @@ pub mod operation_component {
pub struct OperationNameBuilder {
method: Option<String>,
path: Option<String>,
operation: Option<String>,
target: Option<String>,
}
@ -212,6 +213,7 @@ impl OperationNameBuilder {
Self {
method: None,
path: None,
operation: None,
target: None,
}
}
@ -234,6 +236,15 @@ impl OperationNameBuilder {
self
}
/// Set the operation type (optional, for MCP operations)
///
/// # Arguments
/// * `operation` - Operation type (e.g., "tool_call", "session_init", "notification")
pub fn with_operation(mut self, operation: impl Into<String>) -> Self {
self.operation = Some(operation.into());
self
}
/// Set the target (model name, agent name, or filter name)
///
/// # Arguments
@ -246,7 +257,8 @@ impl OperationNameBuilder {
/// Build the operation name string
///
/// # Format
/// - With all components: `{method} {path} {target}`
/// - With all components: `{method} {path} ({operation}) {target}`
/// - Without operation: `{method} {path} {target}`
/// - Without target: `{method} {path}`
/// - Without path: `{method}`
/// - Empty: returns empty string
@ -258,7 +270,11 @@ impl OperationNameBuilder {
}
if let Some(path) = self.path {
parts.push(path);
if let Some(operation) = self.operation {
parts.push(format!("{} ({})", path, operation));
} else {
parts.push(path);
}
}
if let Some(target) = self.target {

1
crates/build.sh Normal file
View file

@ -0,0 +1 @@
cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway && cargo build --release -p brightstaff

View file

@ -21,8 +21,11 @@ pub struct ModelAlias {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Agent {
pub id: String,
pub kind: Option<String>,
pub transport: Option<String>,
pub tool: Option<String>,
pub url: String,
#[serde(rename = "type")]
pub agent_type: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -71,6 +74,7 @@ pub struct Configuration {
pub mode: Option<GatewayMode>,
pub routing: Option<Routing>,
pub agents: Option<Vec<Agent>>,
pub filters: Option<Vec<Agent>>,
pub listeners: Vec<Listener>,
pub state_storage: Option<StateStorageConfig>,
}

View file

@ -32,3 +32,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
pub const OTEL_POST_PATH: &str = "/v1/traces";
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
pub const BRIGHT_STAFF_SERVICE_NAME : &str = "brightstaff";

View file

@ -18,7 +18,7 @@ pub use shapes::{
};
// Re-export new utilities
pub use span_builder::{SpanBuilder, SpanKind};
pub use span_builder::{SpanBuilder, SpanKind, generate_random_span_id};
pub use resource_span_builder::ResourceSpanBuilder;
pub use constants::*;

View file

@ -37,6 +37,7 @@ pub struct SpanBuilder {
end_time: Option<SystemTime>,
kind: SpanKind,
attributes: HashMap<String, String>,
span_id: Option<String>,
}
impl SpanBuilder {
@ -53,6 +54,7 @@ impl SpanBuilder {
end_time: None,
kind: SpanKind::Internal,
attributes: HashMap::new(),
span_id: None,
}
}
@ -62,6 +64,11 @@ impl SpanBuilder {
self
}
pub fn with_span_id(mut self, span_id: impl Into<String>) -> Self {
self.span_id = Some(span_id.into());
self
}
/// Set the parent span ID to link this span to its parent
pub fn with_parent_span_id(mut self, parent_span_id: impl Into<String>) -> Self {
self.parent_span_id = Some(parent_span_id.into());
@ -125,7 +132,7 @@ impl SpanBuilder {
// Build span directly without going through Span::new()
Span {
trace_id,
span_id: generate_random_span_id(),
span_id: self.span_id.unwrap_or_else(|| generate_random_span_id()),
parent_span_id: self.parent_span_id,
name: self.name,
start_time_unix_nano: format!("{}", start_nanos),
@ -145,7 +152,7 @@ fn system_time_to_nanos(time: SystemTime) -> u128 {
}
/// Generate a random span ID (16 hex characters = 8 bytes)
fn generate_random_span_id() -> String {
pub fn generate_random_span_id() -> String {
use rand::RngCore;
let mut rng = rand::thread_rng();
let mut random_bytes = [0u8; 8];

View file

@ -233,6 +233,104 @@ impl ProviderRequest for ConverseRequest {
fn get_temperature(&self) -> Option<f32> {
self.inference_config.as_ref()?.temperature
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
use crate::apis::openai::{Message, MessageContent, Role};
let mut openai_messages = Vec::new();
// Add system messages if present
if let Some(system) = &self.system {
for sys_block in system {
match sys_block {
SystemContentBlock::Text { text } => {
openai_messages.push(Message {
role: Role::System,
content: MessageContent::Text(text.clone()),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
_ => {} // Skip other system content types
}
}
}
// Convert conversation messages
if let Some(messages) = &self.messages {
for msg in messages {
let role = match msg.role {
ConversationRole::User => Role::User,
ConversationRole::Assistant => Role::Assistant,
};
// Extract text from content blocks
let content = msg.content.iter()
.filter_map(|block| {
if let ContentBlock::Text { text } = block {
Some(text.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n");
openai_messages.push(Message {
role,
content: MessageContent::Text(content),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
}
openai_messages
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
// Convert OpenAI messages to Bedrock format
use crate::apis::amazon_bedrock::{ContentBlock, ConversationRole, SystemContentBlock};
let mut system_blocks = Vec::new();
let mut bedrock_messages = Vec::new();
for msg in messages {
match msg.role {
crate::apis::openai::Role::System => {
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
system_blocks.push(SystemContentBlock::Text { text: text.clone() });
}
}
crate::apis::openai::Role::User | crate::apis::openai::Role::Assistant => {
let role = match msg.role {
crate::apis::openai::Role::User => ConversationRole::User,
crate::apis::openai::Role::Assistant => ConversationRole::Assistant,
_ => continue,
};
let content = if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
vec![ContentBlock::Text { text: text.clone() }]
} else {
vec![]
};
bedrock_messages.push(crate::apis::amazon_bedrock::Message {
role,
content,
});
}
_ => {}
}
}
if !system_blocks.is_empty() {
self.system = Some(system_blocks);
}
self.messages = Some(bedrock_messages);
}
}
// ============================================================================

View file

@ -541,6 +541,65 @@ impl ProviderRequest for MessagesRequest {
fn get_temperature(&self) -> Option<f32> {
self.temperature
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
use crate::apis::openai::Message;
let mut openai_messages = Vec::new();
// Add system prompt as system message if present
if let Some(system) = &self.system {
openai_messages.push(system.clone().into());
}
// Convert each Anthropic message to OpenAI format
for msg in &self.messages {
if let Ok(converted_msgs) = TryInto::<Vec<Message>>::try_into(msg.clone()) {
openai_messages.extend(converted_msgs);
}
}
openai_messages
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
// Convert OpenAI messages to Anthropic format
// Separate system messages from regular messages
let mut system_messages = Vec::new();
let mut regular_messages = Vec::new();
for msg in messages {
if msg.role == crate::apis::openai::Role::System {
system_messages.push(msg.clone());
} else {
regular_messages.push(msg.clone());
}
}
// Set system prompt if there are system messages
if !system_messages.is_empty() {
// Combine all system messages into one
let system_text = system_messages.iter()
.filter_map(|msg| {
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
Some(text.as_str())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n");
self.system = Some(crate::apis::anthropic::MessagesSystemPrompt::Single(system_text));
}
// Convert regular messages
self.messages = regular_messages.iter()
.filter_map(|msg| {
msg.clone().try_into().ok()
})
.collect();
}
}
impl MessagesResponse {

View file

@ -735,6 +735,14 @@ impl ProviderRequest for ChatCompletionsRequest {
fn get_temperature(&self) -> Option<f32> {
self.temperature
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
self.messages.clone()
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
self.messages = messages.to_vec();
}
}
/// Implementation of ProviderResponse for ChatCompletionsResponse

View file

@ -1134,6 +1134,140 @@ impl ProviderRequest for ResponsesAPIRequest {
fn get_temperature(&self) -> Option<f32> {
self.temperature
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
use crate::apis::openai::{Message, MessageContent, Role};
let mut openai_messages = Vec::new();
// Add instructions as system message if present
if let Some(instructions) = &self.instructions {
openai_messages.push(Message {
role: Role::System,
content: MessageContent::Text(instructions.clone()),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
// Convert input to messages
match &self.input {
InputParam::Text(text) => {
openai_messages.push(Message {
role: Role::User,
content: MessageContent::Text(text.clone()),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
InputParam::Items(items) => {
for item in items {
match item {
InputItem::Message(msg) => {
// Convert message role
let role = match msg.role {
MessageRole::User => Role::User,
MessageRole::Assistant => Role::Assistant,
MessageRole::System => Role::System,
MessageRole::Developer => Role::System, // Map developer to system
};
// Extract text from message content
let content = match &msg.content {
crate::apis::openai_responses::MessageContent::Text(text) => text.clone(),
crate::apis::openai_responses::MessageContent::Items(items) => {
items.iter()
.filter_map(|c| {
if let InputContent::InputText { text } = c {
Some(text.clone())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n")
}
};
openai_messages.push(Message {
role,
content: MessageContent::Text(content),
name: None,
tool_calls: None,
tool_call_id: None,
});
}
// Skip other input item types for now
InputItem::ItemReference { .. } | InputItem::FunctionCallOutput { .. } => {
// These are not yet supported in agent framework
}
}
}
}
}
openai_messages
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
// For ResponsesAPI, we need to convert messages back to input format
// Extract system messages as instructions
let system_text = messages.iter()
.filter(|msg| msg.role == crate::apis::openai::Role::System)
.filter_map(|msg| {
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
Some(text.as_str())
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n");
if !system_text.is_empty() {
self.instructions = Some(system_text);
}
// Convert user/assistant messages to InputParam
// For simplicity, we'll use the last user message as the input
// or combine all non-system messages
let input_messages: Vec<_> = messages.iter()
.filter(|msg| msg.role != crate::apis::openai::Role::System)
.collect();
if !input_messages.is_empty() {
// If there's only one message, use Text format
if input_messages.len() == 1 {
if let crate::apis::openai::MessageContent::Text(text) = &input_messages[0].content {
self.input = crate::apis::openai_responses::InputParam::Text(text.clone());
}
} else {
// Multiple messages - combine them as text for now
// A more sophisticated approach would use InputParam::Items
let combined_text = input_messages.iter()
.filter_map(|msg| {
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
Some(format!("{}: {}",
match msg.role {
crate::apis::openai::Role::User => "User",
crate::apis::openai::Role::Assistant => "Assistant",
_ => "Unknown",
},
text
))
} else {
None
}
})
.collect::<Vec<_>>()
.join("\n");
self.input = crate::apis::openai_responses::InputParam::Text(combined_text);
}
}
}
}
// ============================================================================

View file

@ -47,6 +47,28 @@ pub trait ProviderRequest: Send + Sync {
fn remove_metadata_key(&mut self, key: &str) -> bool;
fn get_temperature(&self) -> Option<f32>;
/// Get message history as OpenAI Message format
/// This is useful for processing chat history across different provider formats
fn get_messages(&self) -> Vec<crate::apis::openai::Message>;
/// Set message history from OpenAI Message format
/// This converts OpenAI messages to the appropriate format for each provider type
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]);
}
impl ProviderRequestType {
/// Set message history from OpenAI Message format
/// This converts OpenAI messages to the appropriate format for each provider type
pub fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
match self {
Self::ChatCompletionsRequest(r) => r.set_messages(messages),
Self::MessagesRequest(r) => r.set_messages(messages),
Self::BedrockConverse(r) => r.set_messages(messages),
Self::BedrockConverseStream(r) => r.set_messages(messages),
Self::ResponsesAPIRequest(r) => r.set_messages(messages),
}
}
}
impl ProviderRequest for ProviderRequestType {
@ -149,6 +171,26 @@ impl ProviderRequest for ProviderRequestType {
Self::ResponsesAPIRequest(r) => r.get_temperature(),
}
}
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
match self {
Self::ChatCompletionsRequest(r) => r.get_messages(),
Self::MessagesRequest(r) => r.get_messages(),
Self::BedrockConverse(r) => r.get_messages(),
Self::BedrockConverseStream(r) => r.get_messages(),
Self::ResponsesAPIRequest(r) => r.get_messages(),
}
}
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
match self {
Self::ChatCompletionsRequest(r) => r.set_messages(messages),
Self::MessagesRequest(r) => r.set_messages(messages),
Self::BedrockConverse(r) => r.set_messages(messages),
Self::BedrockConverseStream(r) => r.set_messages(messages),
Self::ResponsesAPIRequest(r) => r.set_messages(messages),
}
}
}
/// Parse the client API from a byte slice.
@ -934,4 +976,131 @@ mod tests {
.message
.contains("OpenAI ChatCompletions, Anthropic Messages, and OpenAI Responses"));
}
#[test]
fn test_get_message_history_chat_completions() {
use crate::apis::openai::{Message, MessageContent, Role};
let chat_req = ChatCompletionsRequest {
model: "gpt-4".to_string(),
messages: vec![
Message {
role: Role::System,
content: MessageContent::Text("You are helpful".to_string()),
name: None,
tool_calls: None,
tool_call_id: None,
},
Message {
role: Role::User,
content: MessageContent::Text("Hello!".to_string()),
name: None,
tool_calls: None,
tool_call_id: None,
},
],
..Default::default()
};
let provider_req = ProviderRequestType::ChatCompletionsRequest(chat_req);
let messages = provider_req.get_messages();
assert_eq!(messages.len(), 2);
assert_eq!(messages[0].role, Role::System);
assert_eq!(messages[1].role, Role::User);
}
#[test]
fn test_get_message_history_anthropic_messages() {
use crate::apis::anthropic::{
MessagesMessage, MessagesMessageContent, MessagesRequest, MessagesRole,
MessagesSystemPrompt,
};
let anthropic_req = MessagesRequest {
model: "claude-3-sonnet".to_string(),
messages: vec![MessagesMessage {
role: MessagesRole::User,
content: MessagesMessageContent::Single("Hello!".to_string()),
}],
system: Some(MessagesSystemPrompt::Single(
"You are helpful".to_string(),
)),
max_tokens: 100,
container: None,
mcp_servers: None,
metadata: None,
service_tier: None,
thinking: None,
temperature: None,
top_p: None,
top_k: None,
stream: None,
stop_sequences: None,
tools: None,
tool_choice: None,
};
let provider_req = ProviderRequestType::MessagesRequest(anthropic_req);
let messages = provider_req.get_messages();
// Should have system message + user message
assert_eq!(messages.len(), 2);
assert_eq!(
messages[0].role,
crate::apis::openai::Role::System
);
assert_eq!(
messages[1].role,
crate::apis::openai::Role::User
);
}
#[test]
fn test_get_message_history_responses_api() {
use crate::apis::openai_responses::{InputParam, ResponsesAPIRequest};
let responses_req = ResponsesAPIRequest {
model: "gpt-4o".to_string(),
input: InputParam::Text("Hello, world!".to_string()),
instructions: Some("Be helpful".to_string()),
temperature: None,
max_output_tokens: None,
stream: None,
metadata: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
modalities: None,
user: None,
store: None,
reasoning_effort: None,
include: None,
audio: None,
text: None,
service_tier: None,
top_p: None,
top_logprobs: None,
stream_options: None,
truncation: None,
conversation: None,
previous_response_id: None,
max_tool_calls: None,
background: None,
};
let provider_req = ProviderRequestType::ResponsesAPIRequest(responses_req);
let messages = provider_req.get_messages();
// Should have system message (instructions) + user message (input)
assert_eq!(messages.len(), 2);
assert_eq!(
messages[0].role,
crate::apis::openai::Role::System
);
assert_eq!(
messages[1].role,
crate::apis::openai::Role::User
);
}
}

View file

@ -0,0 +1,106 @@
# RAG Agent Demo
A multi-agent RAG system demonstrating archgw's agent filter chain with MCP protocol.
## Architecture
This demo consists of three components:
1. **Query Rewriter** (MCP filter) - Rewrites user queries for better retrieval
2. **Context Builder** (MCP filter) - Retrieves relevant context from knowledge base
3. **RAG Agent** (REST) - Generates final responses based on augmented context
## Components
### Query Rewriter Filter (MCP)
- **Port**: 10501
- **Tool**: `query_rewriter`
- Improves queries using LLM before retrieval
### Context Builder Filter (MCP)
- **Port**: 10502
- **Tool**: `context_builder`
- Augments queries with relevant passages from knowledge base
### RAG Agent (REST/OpenAI)
- **Port**: 10505
- **Endpoint**: `/v1/chat/completions`
- Generates responses using OpenAI-compatible API
## Quick Start
### 1. Start all agents
```bash
./start_agents.sh
```
This starts:
- Query Rewriter MCP server on port 10501
- Context Builder MCP server on port 10502
- RAG Agent REST server on port 10505
### 2. Start archgw
```bash
archgw up --foreground
```
### 3. Test the system
```bash
curl -X POST http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"messages": [{"role": "user", "content": "What is the guaranteed uptime for TechCorp?"}]
}'
```
## Configuration
The `arch_config.yaml` defines how agents are connected:
```yaml
filters:
- id: query_rewriter
url: mcp://host.docker.internal:10500
tool: rewrite_query_with_archgw # MCP tool name
- id: context_builder
url: mcp://host.docker.internal:10501
tool: chat_completions
```
How It Works
1. User sends request to archgw listener on port 8001
2. Request passes through MCP filter chain:
- **Query Rewriter** rewrites the query for better retrieval
- **Context Builder** augments query with relevant knowledge base passages
3. Augmented request is forwarded to **RAG Agent** REST endpoint
4. RAG Agent generates final response using LLM
## Configuration
See `arch_config.yaml` for the complete filter chain setup. The MCP filters use default settings:
- `type: mcp` (default)
- `transport: streamable-http` (default)
- Tool name defaults to filter ID `sample_queries.md` for example queries to test the RAG system.
Example request:
```bash
curl -X POST http://localhost:8001/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime for TechCorp?"
}
]
}'
```
- `LLM_GATEWAY_ENDPOINT` - archgw endpoint (default: `http://localhost:12000/v1`)
- `OPENAI_API_KEY` - OpenAI API key for model providers
## Additional Resources
- See `sample_queries.md` for more example queries
- See `arch_config.yaml` for complete configuration details

View file

@ -0,0 +1,41 @@
version: v0.3.0
agents:
- id: rag_agent
url: http://host.docker.internal:10505
filters:
- id: query_rewriter
url: http://host.docker.internal:10501
# type: mcp # default is mcp
# transport: streamable-http # default is streamable-http
# tool: query_rewriter # default name is the filter id
- id: context_builder
url: http://host.docker.internal:10502
model_providers:
- model: openai/gpt-4o-mini
access_key: $OPENAI_API_KEY
default: true
- model: openai/gpt-4o
access_key: $OPENAI_API_KEY
model_aliases:
fast-llm:
target: gpt-4o-mini
smart-llm:
target: gpt-4o
listeners:
- type: agent
name: agent_1
port: 8001
router: arch_agent_router
agents:
- id: rag_agent
description: virtual assistant for retrieval augmented generation tasks
filter_chain:
- query_rewriter
- context_builder
tracing:
random_sampling: 100

View file

@ -0,0 +1,17 @@
services:
jaeger:
build:
context: ../../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
open-web-ui:
image: dyrnq/open-webui:main
restart: always
ports:
- "8080:8080"
environment:
- DEFAULT_MODEL=gpt-4o-mini
- ENABLE_OPENAI_API=true
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1

View file

@ -0,0 +1,86 @@
### Initialize MCP Session (SSE)
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"capabilities":{},"protocolVersion":"2024-11-05","clientInfo":{"name":"test","version":"1.0.0"}}}
### Send Initialized Notification
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: 35d455dc07b8400887f86668590f12bb
{
"jsonrpc": "2.0",
"method": "notifications/initialized"
}
### List Tools
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: eb10a691b36e4547b6c93c5dc5b47e11
{
"jsonrpc": "2.0",
"id": "list-tools-1",
"method": "tools/list"
}
### Call Query Rewriter Tool
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: 6b95ff75825a402b90eb3ea07e23fbce
{
"jsonrpc": "2.0",
"id": "3d3b886a-6216-4a26-a422-7a972529c0e7",
"method": "tools/call",
"params": {
"arguments": {
"messages": [
{
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?",
"role": "user"
}
]
},
"name": "query_rewriter"
}
}
### another test
# Content-Type: application/json
# Accept: application/json, text/event-stream
# mcp-session-id: ed7a81a1d39549ecaadb867a6b2daf1e
POST http://localhost:10501/mcp
content-type: application/json
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
accept: application/json, text/event-stream
{"jsonrpc":"2.0","id":"4bb1043a-2953-4bcd-b801-f270b0ae8c39","method":"tools/call","params":{"arguments":{"messages":[{"content":"What is the guaranteed uptime percentage for TechCorp's cloud services?","role":"user"}]},"name":"query_rewriter"}}
### stream test
POST http://localhost:10501/mcp
content-type: application/json
mcp-session-id: 35d455dc07b8400887f86668590f12bb
accept: application/json, text/event-stream
{
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {
"name": "long_job",
"arguments": {
"n": 3
}
}
}

View file

@ -0,0 +1,22 @@
[project]
name = "rag_agent"
version = "0.1.0"
description = "RAG Agent"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"click>=8.2.1",
"mcp>=1.13.1",
"fastmcp>=2.14",
"pydantic>=2.11.7",
"fastapi>=0.104.1",
"uvicorn>=0.24.0",
"openai==2.13.0",
]
[project.scripts]
rag_agent = "rag_agent:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

View file

@ -0,0 +1,64 @@
# Sample Queries for Knowledge Base RAG Agent
## Service Level Agreement Queries
- What is the guaranteed uptime percentage for TechCorp's cloud services?
- What remedies are available if the API response time exceeds the agreed threshold?
- How quickly must TechCorp respond to critical support issues?
- What monitoring and reporting requirements are specified in the SLA?
- When was the TechCorp service agreement signed and by whom?
## Privacy Policy Queries
- What encryption methods does DataSecure use to protect data?
- How long does DataSecure retain personal data after account deletion?
- What rights do users have regarding their personal information?
- Can DataSecure sell user data to third parties for marketing?
- Who should be contacted for privacy-related concerns at DataSecure?
## Supply Chain Agreement Queries
- What types of automotive components does PrecisionParts supply?
- What are the payment terms and volume discount structure?
- What quality standards must the supplied components meet?
- What are the penalties for late delivery?
- What insurance coverage requirements apply to the supplier?
## Student Data Management Queries
- What federal laws must EduTech comply with regarding student data?
- What security measures are in place to protect student information?
- How long are student records retained after graduation?
- What consent is required for students under 13 years old?
- Who can access student educational records?
## Investment Advisory Queries
- What is FinanceFirst's management fee structure?
- What types of investments are included in the advisory services?
- What regulatory body oversees FinanceFirst Advisors?
- How often are portfolio reviews conducted?
- What are the client's responsibilities under this agreement?
## Healthcare Standards Queries
- What is the target response time for emergency code teams?
- What hand hygiene compliance rate is required?
- How quickly must medical records be completed after patient encounters?
- What continuing education requirements apply to nursing staff?
- What patient safety protocols are mandatory upon admission?
## Cross-Document Queries
- Which agreements include confidentiality or data protection provisions?
- What are the common termination notice periods across different contract types?
- Which documents specify insurance or liability coverage requirements?
- What compliance and regulatory requirements are mentioned across agreements?
- Which contracts include performance metrics or service level commitments?
## Complex Analysis Queries
- Compare the data retention policies across the privacy policy and student data management documents.
- What are the different approaches to risk management across the supply chain and investment advisory agreements?
- How do the security measures in the healthcare standards compare to those in the privacy policy?
- Which agreements provide the most detailed compliance and regulatory frameworks?
- What common themes exist in the quality assurance requirements across different industries?
## Document-Specific Detail Queries
- List all the specific percentages, timeframes, and numerical requirements mentioned in the SLA.
- What are all the contact persons and their roles mentioned across the documents?
- Identify all the compliance standards and certifications referenced in the supply chain agreement.
- What are the specific consequences or penalties mentioned for non-compliance across agreements?
- List all the third-party systems, tools, or services mentioned in the documents.

View file

@ -0,0 +1,98 @@
import click
from fastmcp import FastMCP
mcp = None
@click.command()
@click.option(
"--transport",
"transport",
default="streamable-http",
help="Transport type: stdio or sse",
)
@click.option("--host", "host", default="localhost", help="Host to bind MCP server to")
@click.option("--port", "port", type=int, default=10500, help="Port for MCP server")
@click.option(
"--agent",
"agent",
required=True,
help="Agent name: query_rewriter, context_builder, or response_generator",
)
@click.option(
"--name",
"agent_name",
default=None,
help="Custom MCP server name (defaults to agent type)",
)
@click.option(
"--rest-server",
"rest_server",
is_flag=True,
help="Start REST server instead of MCP server",
)
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
def main(host, port, agent, transport, agent_name, rest_server, rest_port):
"""Start a RAG agent as an MCP server or REST server."""
# Map friendly names to agent modules
agent_map = {
"query_rewriter": ("rag_agent.query_rewriter", "Query Rewriter Agent"),
"context_builder": ("rag_agent.context_builder", "Context Builder Agent"),
"response_generator": (
"rag_agent.rag_agent",
"Response Generator Agent",
),
}
if agent not in agent_map:
print(f"Error: Unknown agent '{agent}'")
print(f"Available agents: {', '.join(agent_map.keys())}")
return
module_name, default_name = agent_map[agent]
mcp_name = agent_name or default_name
if rest_server:
# Only response_generator supports REST server mode
if agent != "response_generator":
print(f"Error: Agent '{agent}' does not support REST server mode.")
print(f"REST server is only supported for: response_generator")
print(f"Remove --rest-server flag to start {agent} as an MCP server.")
return
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
from rag_agent.rag_agent import start_server
start_server(host=host, port=rest_port)
return
else:
# Only query_rewriter and context_builder support MCP
if agent not in ["query_rewriter", "context_builder"]:
print(f"Error: Agent '{agent}' does not support MCP mode.")
print(f"MCP is only supported for: query_rewriter, context_builder")
print(f"Use --rest-server flag to start {agent} as a REST server.")
return
global mcp
mcp = FastMCP(mcp_name, host=host, port=port)
print(f"Starting MCP server: {mcp_name}")
print(f" Agent: {agent}")
print(f" Transport: {transport}")
print(f" Host: {host}")
print(f" Port: {port}")
# Import the agent module to register its tools
import importlib
importlib.import_module(module_name)
print(f"Agent '{agent}' loaded successfully")
print(f"MCP server ready on {transport}://{host}:{port}")
mcp.run(transport=transport)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,4 @@
from . import main
if __name__ == "__main__":
main()

View file

@ -0,0 +1,36 @@
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 1.0
max_tokens: Optional[int] = None
top_p: Optional[float] = 1.0
frequency_penalty: Optional[float] = 0.0
presence_penalty: Optional[float] = 0.0
stream: Optional[bool] = False
stop: Optional[List[str]] = None
class ChatCompletionResponse(BaseModel):
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[Dict[str, Any]]
usage: Dict[str, int]
class ChatCompletionStreamResponse(BaseModel):
id: str
object: str = "chat.completion.chunk"
created: int
model: str
choices: List[Dict[str, Any]]

View file

@ -0,0 +1,205 @@
import json
from typing import List, Optional, Dict, Any
from openai import AsyncOpenAI
import os
import logging
import csv
from pathlib import Path
from .api import ChatMessage
from . import mcp
from fastmcp.server.dependencies import get_http_headers
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Configuration for archgw LLM gateway
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
RAG_MODEL = "gpt-4o-mini"
# Initialize OpenAI client for archgw
archgw_client = AsyncOpenAI(
base_url=LLM_GATEWAY_ENDPOINT,
api_key="EMPTY", # archgw doesn't require a real API key
)
# Global variable to store the knowledge base
knowledge_base = []
def load_knowledge_base():
"""Load the sample_knowledge_base.csv file into memory on startup."""
global knowledge_base
# Get the path to the CSV file relative to this script
current_dir = Path(__file__).parent
csv_path = current_dir / "sample_knowledge_base.csv"
print(f"Loading knowledge base from {csv_path}")
try:
knowledge_base = []
with open(csv_path, "r", encoding="utf-8-sig") as file:
csv_reader = csv.DictReader(file)
for row in csv_reader:
knowledge_base.append({"path": row["path"], "content": row["content"]})
logger.info(f"Loaded {len(knowledge_base)} documents from knowledge base")
except Exception as e:
logger.error(f"Error loading knowledge base: {e}")
knowledge_base = []
async def find_relevant_passages(
query: str, traceparent: Optional[str] = None, top_k: int = 3
) -> List[Dict[str, str]]:
"""Use the LLM to find the most relevant passages from the knowledge base."""
if not knowledge_base:
logger.warning("Knowledge base is empty")
return []
# Create a system prompt for passage selection
system_prompt = f"""You are a retrieval assistant that selects the most relevant document passages for a given query.
Given a user query and a list of document passages, identify the {top_k} most relevant passages that would help answer the query.
Query: {query}
Available passages:
"""
# Add all passages with indices
for i, doc in enumerate(knowledge_base):
system_prompt += (
f"\n[{i}] Path: {doc['path']}\nContent: {doc['content'][:500]}...\n"
)
system_prompt += f"""
Please respond with ONLY the indices of the {top_k} most relevant passages, separated by commas (e.g., "0,3,7").
If fewer than {top_k} passages are relevant, return only the relevant ones.
If no passages are relevant, return "NONE"."""
try:
# Call archgw to select relevant passages
logger.info(f"Calling archgw to find relevant passages for query: '{query}'")
# Prepare extra headers if traceparent is provided
extra_headers = {"x-envoy-max-retries": "3"}
if traceparent:
extra_headers["traceparent"] = traceparent
response = await archgw_client.chat.completions.create(
model=RAG_MODEL,
messages=[{"role": "system", "content": system_prompt}],
temperature=0.1,
max_tokens=50,
extra_headers=extra_headers,
)
result = response.choices[0].message.content.strip()
logger.info(f"LLM selected passages: {result}")
# Parse the indices
if result.upper() == "NONE":
return []
selected_passages = []
indices = [
int(idx.strip()) for idx in result.split(",") if idx.strip().isdigit()
]
for idx in indices:
if 0 <= idx < len(knowledge_base):
selected_passages.append(knowledge_base[idx])
logger.info(f"Selected {len(selected_passages)} relevant passages")
return selected_passages
except Exception as e:
logger.error(f"Error finding relevant passages: {e}")
return []
async def augment_query_with_context(
messages: List[ChatMessage], traceparent: Optional[str] = None
) -> List[ChatMessage]:
"""Extract user query, find relevant context, and augment the messages."""
# Find the last user message
last_user_message = None
last_user_index = -1
for i in range(len(messages) - 1, -1, -1):
if messages[i].role == "user":
last_user_message = messages[i].content
last_user_index = i
break
if not last_user_message:
logger.warning("No user message found in conversation")
return messages
logger.info(f"Processing user query: '{last_user_message}'")
# Find relevant passages
relevant_passages = await find_relevant_passages(last_user_message, traceparent)
if not relevant_passages:
logger.info("No relevant passages found, returning original messages")
return messages
# Build context from relevant passages
context_parts = []
for i, passage in enumerate(relevant_passages):
context_parts.append(
f"Document {i+1} ({passage['path']}):\n{passage['content']}"
)
context = "\n\n".join(context_parts)
# Create augmented content with original query and context
augmented_content = f"""{last_user_message} RELEVANT CONTEXT:
{context}"""
# Create updated messages with the augmented query
updated_messages = messages.copy()
updated_messages[last_user_index] = ChatMessage(
role="user", content=augmented_content
)
logger.info(f"Augmented user query with {len(relevant_passages)} relevant passages")
return updated_messages
# Load knowledge base on module import
load_knowledge_base()
@mcp.tool()
async def context_builder(messages: List[ChatMessage]) -> List[ChatMessage]:
"""MCP tool that augments user queries with relevant context from the knowledge base."""
logger.info(f"Received chat completion request with {len(messages)} messages")
# Get traceparent header from MCP request
headers = get_http_headers()
traceparent_header = headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
logger.info("No traceparent header found")
# Augment the user query with relevant context
updated_messages = await augment_query_with_context(messages, traceparent_header)
# Return as dict to minimize text serialization
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]

View file

@ -0,0 +1,119 @@
import asyncio
import json
from typing import List, Optional, Dict, Any
from openai import AsyncOpenAI
import os
import logging
from .api import ChatMessage
from . import mcp
from fastmcp.server.dependencies import get_http_headers
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Configuration for archgw LLM gateway
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
QUERY_REWRITE_MODEL = "gpt-4o-mini"
# Initialize OpenAI client for archgw
archgw_client = AsyncOpenAI(
base_url=LLM_GATEWAY_ENDPOINT,
api_key="EMPTY", # archgw doesn't require a real API key
)
async def rewrite_query_with_archgw(
messages: List[ChatMessage], traceparent_header: str
) -> str:
"""Rewrite the user query using LLM for better retrieval."""
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
Given a conversation history, rewrite the last user message to be more specific and context-aware.
The rewritten query should:
1. Include relevant context from previous messages
2. Be clear and specific for information retrieval
3. Maintain the user's intent
4. Be concise but comprehensive
Return only the rewritten query, nothing else."""
# Prepare messages for the query rewriter - just add system prompt to existing messages
rewrite_messages = [{"role": "system", "content": system_prompt}]
# Add conversation history
for msg in messages:
rewrite_messages.append({"role": msg.role, "content": msg.content})
try:
# Call archgw using OpenAI client
extra_headers = {"x-envoy-max-retries": "3"}
if traceparent_header:
extra_headers["traceparent"] = traceparent_header
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to rewrite query")
response = await archgw_client.chat.completions.create(
model=QUERY_REWRITE_MODEL,
messages=rewrite_messages,
temperature=0.3,
max_tokens=200,
extra_headers=extra_headers,
)
rewritten_query = response.choices[0].message.content.strip()
logger.info(f"Query rewritten successfully: '{rewritten_query}'")
return rewritten_query
except Exception as e:
logger.error(f"Error rewriting query: {e}")
# If rewriting fails, return the original last user message
logger.info("Falling back to original user message")
for message in reversed(messages):
if message.role == "user":
return message.content
return ""
@mcp.tool()
async def query_rewriter(messages: List[ChatMessage]) -> List[ChatMessage]:
"""Chat completions endpoint that rewrites the last user query using archgw.
Returns a dict with a 'messages' key containing the updated message list.
"""
import time
import uuid
logger.info(f"Received chat completion request with {len(messages)} messages")
# Get traceparent header from HTTP request using FastMCP's dependency function
headers = get_http_headers()
traceparent_header = headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
logger.info("No traceparent header found")
# Call archgw to rewrite the last user query
rewritten_query = await rewrite_query_with_archgw(messages, traceparent_header)
# Create updated messages with the rewritten query
updated_messages = messages.copy()
# Find and update the last user message with the rewritten query
for i in range(len(updated_messages) - 1, -1, -1):
if updated_messages[i].role == "user":
original_query = updated_messages[i].content
updated_messages[i] = ChatMessage(role="user", content=rewritten_query)
logger.info(
f"Updated user query from '{original_query}' to '{rewritten_query}'"
)
break
# Return as dict to minimize text serialization
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]

View file

@ -0,0 +1,303 @@
import json
from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse
from openai import AsyncOpenAI
import os
import logging
import time
import uuid
import uvicorn
import asyncio
from .api import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatCompletionStreamResponse,
)
# Set up logging
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Configuration for archgw LLM gateway
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
RESPONSE_MODEL = "gpt-4o"
# System prompt for response generation
SYSTEM_PROMPT = """You are a helpful assistant that generates coherent, contextual responses.
Given a conversation history, generate a helpful and relevant response based on all the context available in the messages.
Your response should:
1. Be contextually aware of the entire conversation
2. Address the user's needs appropriately
3. Be helpful and informative
4. Maintain a natural conversational tone
Generate a complete response to assist the user."""
# Initialize OpenAI client for archgw
archgw_client = AsyncOpenAI(
base_url=LLM_GATEWAY_ENDPOINT,
api_key="EMPTY", # archgw doesn't require a real API key
)
# FastAPI app for REST server
app = FastAPI(title="RAG Agent Response Generator", version="1.0.0")
def prepare_response_messages(request_body: ChatCompletionRequest):
"""Prepare messages for response generation by adding system prompt."""
response_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
# Add conversation history
for msg in request_body.messages:
response_messages.append({"role": msg.role, "content": msg.content})
return response_messages
@app.post("/v1/chat/completions")
async def chat_completion_http(request: Request, request_body: ChatCompletionRequest):
"""HTTP endpoint for chat completions with streaming support."""
logger.info(
f"Received chat completion request with {len(request_body.messages)} messages"
)
# Get traceparent header from HTTP request
traceparent_header = request.headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
logger.info("No traceparent header found")
# Check if streaming is requested
if request_body.stream:
return StreamingResponse(
stream_chat_completions(request_body, traceparent_header),
media_type="text/plain",
headers={
"content-type": "text/event-stream",
},
)
else:
return await non_streaming_chat_completions(request_body, traceparent_header)
async def stream_chat_completions(
request_body: ChatCompletionRequest, traceparent_header: str = None
):
"""Generate streaming chat completions."""
# Prepare messages for response generation
response_messages = prepare_response_messages(request_body)
try:
# Call archgw using OpenAI client for streaming
logger.info(
f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate streaming response"
)
# Prepare extra headers if traceparent is provided
extra_headers = {"x-envoy-max-retries": "3"}
if traceparent_header:
extra_headers["traceparent"] = traceparent_header
response_stream = await archgw_client.chat.completions.create(
model=RESPONSE_MODEL,
messages=response_messages,
temperature=request_body.temperature or 0.7,
max_tokens=request_body.max_tokens or 1000,
stream=True,
extra_headers=extra_headers,
)
completion_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
created_time = int(time.time())
collected_content = []
async for chunk in response_stream:
if chunk.choices and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
collected_content.append(content)
# Create streaming response chunk
stream_chunk = ChatCompletionStreamResponse(
id=completion_id,
created=created_time,
model=request_body.model,
choices=[
{
"index": 0,
"delta": {"content": content},
"finish_reason": None,
}
],
)
yield f"data: {stream_chunk.model_dump_json()}\n\n"
# Send final chunk with complete response in expected format
full_response = "".join(collected_content)
updated_history = [{"role": "assistant", "content": full_response}]
final_chunk = ChatCompletionStreamResponse(
id=completion_id,
created=created_time,
model=request_body.model,
choices=[
{
"index": 0,
"delta": {},
"finish_reason": "stop",
"message": {
"role": "assistant",
"content": json.dumps(updated_history),
},
}
],
)
yield f"data: {final_chunk.model_dump_json()}\n\n"
yield "data: [DONE]\n\n"
except Exception as e:
logger.error(f"Error generating streaming response: {e}")
# Send error as streaming response
error_chunk = ChatCompletionStreamResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request_body.model,
choices=[
{
"index": 0,
"delta": {
"content": "I apologize, but I'm having trouble generating a response right now. Please try again."
},
"finish_reason": "stop",
}
],
)
yield f"data: {error_chunk.model_dump_json()}\n\n"
yield "data: [DONE]\n\n"
async def non_streaming_chat_completions(
request_body: ChatCompletionRequest, traceparent_header: str = None
):
"""Generate non-streaming chat completions."""
# Prepare messages for response generation
response_messages = prepare_response_messages(request_body)
try:
# Call archgw using OpenAI client
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate response")
# Prepare extra headers if traceparent is provided
extra_headers = {"x-envoy-max-retries": "3"}
if traceparent_header:
extra_headers["traceparent"] = traceparent_header
response = await archgw_client.chat.completions.create(
model=RESPONSE_MODEL,
messages=response_messages,
temperature=request_body.temperature or 0.7,
max_tokens=request_body.max_tokens or 1000,
extra_headers=extra_headers,
)
generated_response = response.choices[0].message.content.strip()
logger.info(f"Response generated successfully")
return ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request_body.model,
choices=[
{
"index": 0,
"message": {
"role": "assistant",
"content": generated_response,
},
"finish_reason": "stop",
}
],
usage={
"prompt_tokens": sum(
len(msg.content.split()) for msg in request_body.messages
),
"completion_tokens": len(generated_response.split()),
"total_tokens": sum(
len(msg.content.split()) for msg in request_body.messages
)
+ len(generated_response.split()),
},
)
except Exception as e:
logger.error(f"Error generating response: {e}")
# Fallback response
fallback_message = "I apologize, but I'm having trouble generating a response right now. Please try again."
return ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request_body.model,
choices=[
{
"index": 0,
"message": {"role": "assistant", "content": fallback_message},
"finish_reason": "stop",
}
],
usage={
"prompt_tokens": sum(
len(msg.content.split()) for msg in request_body.messages
),
"completion_tokens": len(fallback_message.split()),
"total_tokens": sum(
len(msg.content.split()) for msg in request_body.messages
)
+ len(fallback_message.split()),
},
)
@app.get("/health")
async def health_check():
"""Health check endpoint."""
return {"status": "healthy"}
def start_server(host: str = "localhost", port: int = 8000):
"""Start the REST server."""
uvicorn.run(
app,
host=host,
port=port,
log_config={
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
},
},
"handlers": {
"default": {
"formatter": "default",
"class": "logging.StreamHandler",
"stream": "ext://sys.stdout",
},
},
"root": {
"level": "INFO",
"handlers": ["default"],
},
},
)

View file

@ -0,0 +1,257 @@
path,content
TechCorp_CloudServices_SLA_Agreement_2024,"SERVICE LEVEL AGREEMENT
This Service Level Agreement (""SLA"") is entered into on March 15, 2024, between TechCorp Solutions Inc., a Delaware corporation (""Provider""), and CloudFirst Enterprises LLC (""Customer"").
DEFINITIONS
Service Availability: The percentage of time during which the cloud services are operational and accessible.
Downtime: Any period when the services are unavailable or inaccessible to Customer.
Response Time: The time between service request submission and initial response from Provider.
SERVICE COMMITMENTS
Provider guarantees 99.9% uptime for all cloud infrastructure services during any calendar month.
Average response time for API calls shall not exceed 200 milliseconds under normal operating conditions.
Customer support response times: Critical issues within 1 hour, Standard issues within 4 hours.
REMEDIES
For each full percentage point below 99.9% availability, Customer receives 10% credit on monthly fees.
If response times exceed 500ms for more than 5 minutes in any hour, Customer receives 5% monthly credit.
MONITORING AND REPORTING
Provider will maintain real-time monitoring systems and provide monthly performance reports.
All metrics will be measured from Provider's monitoring systems located in primary data centers.
This SLA remains in effect for the duration of the underlying service agreement.
Executed by:
TechCorp Solutions Inc.
Sarah Mitchell, VP Operations
Date: March 15, 2024
CloudFirst Enterprises LLC
Robert Chen, CTO
Date: March 16, 2024"
DataSecure_Privacy_Policy_v3.2,"PRIVACY POLICY
DataSecure Analytics, Inc. (""Company"") Privacy Policy
Effective Date: January 1, 2024
Last Updated: February 28, 2024
INFORMATION COLLECTION
We collect information you provide directly, such as account details, usage preferences, and communication records.
Automatically collected data includes IP addresses, browser types, device information, and service interaction logs.
Third-party integrations may provide additional user behavior and demographic information with consent.
DATA USAGE
Personal information is used to provide services, improve user experience, and communicate service updates.
Aggregated, non-identifiable data may be used for analytics, research, and service enhancement.
We do not sell personal information to third parties for marketing purposes.
DATA PROTECTION
All data is encrypted in transit using TLS 1.3 and at rest using AES-256 encryption.
Access controls limit data access to authorized personnel only on a need-to-know basis.
Regular security audits and penetration testing ensure ongoing protection measures.
DATA RETENTION
Personal data is retained for the duration of active service plus 24 months.
Logs and analytics data are retained for 12 months unless legally required otherwise.
Upon account deletion, personal data is permanently removed within 30 days.
USER RIGHTS
Users may request access to, correction of, or deletion of their personal information.
Data portability requests will be fulfilled in standard formats within 30 days.
Marketing communications can be opted out of at any time.
CONTACT
For privacy concerns, contact: privacy@datasecure.com
Data Protection Officer: Jennifer Walsh, jwalsh@datasecure.com"
GlobalManufacturing_SupplyChain_Contract_Q2_2024,"SUPPLY CHAIN AGREEMENT
This Supply Chain Agreement is entered into between GlobalManufacturing Corp (""Buyer"") and PrecisionParts Ltd (""Supplier"") effective April 1, 2024.
SCOPE OF SERVICES
Supplier will provide automotive components including brake assemblies, suspension parts, and electrical harnesses.
All products must meet ISO 9001 quality standards and automotive industry specifications.
Delivery schedule: Weekly shipments every Tuesday, with 48-hour advance shipping notifications.
PRICING AND PAYMENT
Component pricing is fixed for initial 6-month term with quarterly price review thereafter.
Payment terms: Net 45 days from invoice date via electronic transfer.
Volume discounts apply: 5% for orders exceeding 10,000 units per month, 8% for orders exceeding 25,000 units.
QUALITY REQUIREMENTS
All components must pass incoming inspection with less than 0.1% defect rate.
Supplier maintains quality certifications including IATF 16949 and environmental compliance.
Batch tracking and traceability required for all delivered components.
LOGISTICS AND DELIVERY
Supplier responsible for packaging, labeling, and delivery to Buyer's distribution centers.
Delivery windows: 8 AM - 4 PM, Monday through Friday, with advance appointment scheduling.
Late delivery penalties: 2% of shipment value for each day beyond scheduled delivery.
RISK MANAGEMENT
Supplier maintains business continuity plans and alternative sourcing strategies.
Force majeure events must be reported within 24 hours with mitigation plans.
Insurance requirements: $5M general liability, $2M product liability coverage.
INTELLECTUAL PROPERTY
All custom tooling and specifications remain property of Buyer.
Supplier grants license to use necessary patents for component manufacturing.
This agreement shall remain in effect for 24 months with automatic renewal unless terminated.
GlobalManufacturing Corp
Michael Rodriguez, Supply Chain Director
Date: April 1, 2024
PrecisionParts Ltd
Amanda Foster, VP Sales
Date: April 2, 2024"
EduTech_StudentData_Management_Policy_2024,"STUDENT DATA MANAGEMENT POLICY
EduTech Learning Platform - Data Management and Protection Policy
Document Version: 2.1
Effective Date: August 15, 2024
SCOPE AND PURPOSE
This policy governs the collection, use, storage, and protection of student educational records and personal information.
Applies to all employees, contractors, and third-party service providers accessing student data.
Compliance with FERPA, COPPA, and state student privacy laws is mandatory.
DATA CLASSIFICATION
Educational Records: Grades, attendance, assignments, and academic progress information.
Personal Information: Names, addresses, contact details, and demographic information.
Behavioral Data: Learning patterns, platform usage, and engagement metrics.
COLLECTION PRINCIPLES
Data collection is limited to educational purposes and service improvement only.
Parental consent required for students under 13 years of age.
Students and parents have right to review and request corrections to educational records.
ACCESS CONTROLS
Role-based access ensures personnel see only data necessary for their functions.
Multi-factor authentication required for all system access.
Access logs maintained and reviewed monthly for unauthorized activity.
DATA SHARING
Educational records shared only with authorized school personnel and parents/students.
No data sharing with third parties for commercial purposes without explicit consent.
Research data must be de-identified and aggregated before external sharing.
SECURITY MEASURES
Data encrypted using industry-standard protocols during transmission and storage.
Regular security assessments and vulnerability testing conducted quarterly.
Incident response plan includes notification procedures for data breaches.
RETENTION AND DISPOSAL
Student records retained according to school district policies, typically 5-7 years post-graduation.
Inactive accounts and associated data purged after 2 years of non-use.
Secure data destruction protocols ensure complete removal of sensitive information.
COMPLIANCE MONITORING
Annual privacy training required for all staff handling student data.
Regular audits ensure ongoing compliance with applicable privacy regulations.
Privacy impact assessments conducted for new features or data uses.
Contact: Dr. Lisa Thompson, Chief Privacy Officer
Email: privacy@edutech-learning.com
Phone: (555) 123-4567"
FinanceFirst_Investment_Advisory_Agreement_2024,"INVESTMENT ADVISORY AGREEMENT
This Investment Advisory Agreement is entered into between FinanceFirst Advisors LLC (""Advisor"") and Madison Investment Group (""Client"") on May 20, 2024.
ADVISORY SERVICES
Advisor will provide comprehensive investment management and financial planning services.
Services include portfolio construction, asset allocation, risk assessment, and performance monitoring.
Regular portfolio reviews conducted quarterly with detailed performance reporting.
INVESTMENT AUTHORITY
Client grants Advisor discretionary authority to make investment decisions within agreed parameters.
Investment universe includes stocks, bonds, ETFs, mutual funds, and alternative investments as appropriate.
All trades executed through qualified broker-dealers with best execution practices.
FEE STRUCTURE
Management fee: 1.25% annually on assets under management, calculated and billed quarterly.
Performance fee: 15% of returns exceeding S&P 500 benchmark, calculated annually.
Additional fees may apply for specialized services such as tax planning or estate planning.
CLIENT RESPONSIBILITIES
Client must provide accurate financial information and promptly communicate changes in circumstances.
Investment objectives and risk tolerance should be reviewed and updated annually.
Client responsible for reviewing and approving investment policy statement.
RISK DISCLOSURE
All investments carry risk of loss, and past performance does not guarantee future results.
Diversification does not ensure profit or protect against loss in declining markets.
Alternative investments may have limited liquidity and higher volatility.
REGULATORY COMPLIANCE
Advisor is registered with the Securities and Exchange Commission as an investment advisor.
All activities conducted in accordance with Investment Advisers Act of 1940 and applicable regulations.
Form ADV Part 2 brochure provided annually with material updates.
CONFIDENTIALITY
All client information treated as confidential and shared only as necessary for service provision.
Third-party service providers bound by confidentiality agreements.
Client data protected through secure systems and access controls.
TERMINATION
Either party may terminate agreement with 30 days written notice.
Upon termination, Advisor will assist with orderly transfer of assets to new custodian or advisor.
Final fee calculation prorated to date of termination.
FinanceFirst Advisors LLC
Thomas Anderson, Managing Partner
Date: May 20, 2024
Madison Investment Group
Rebecca Martinez, Chief Investment Officer
Date: May 21, 2024"
HealthSystem_PatientCare_Standards_2024,"PATIENT CARE STANDARDS AND PROTOCOLS
Metropolitan Health System - Clinical Care Standards
Document ID: MHS-PCS-2024-001
Effective Date: June 1, 2024
PATIENT SAFETY PROTOCOLS
All patients must have proper identification verification using two unique identifiers.
Medication administration requires independent double-check for high-risk medications.
Fall risk assessments completed within 4 hours of admission with appropriate interventions.
CLINICAL DOCUMENTATION
Medical records must be completed within 24 hours of patient encounter.
All entries require electronic signature with timestamp and provider identification.
Critical values and abnormal results must be communicated and documented immediately.
INFECTION CONTROL
Hand hygiene compliance monitored with target rate of 95% or higher.
Personal protective equipment used according to transmission-based precautions.
Isolation procedures implemented within 2 hours of identification of infectious conditions.
EMERGENCY RESPONSE
Code team response time target: 3 minutes from activation to arrival.
Crash cart and emergency equipment checks performed daily and documented.
All staff required to maintain current CPR and emergency response certifications.
PATIENT COMMUNICATION
Patient rights and responsibilities communicated upon admission.
Informed consent obtained and documented prior to procedures and treatments.
Family involvement encouraged with respect for patient privacy preferences.
QUALITY MEASURES
Patient satisfaction scores monitored monthly with target of 4.5/5.0 or higher.
Medication error rates tracked with goal of less than 1 per 1000 patient days.
Hospital-acquired infection rates measured and benchmarked against national standards.
STAFF COMPETENCY
Annual competency assessments required for all clinical staff.
Continuing education requirements: 24 hours annually for nurses, 40 hours for physicians.
Specialty certifications maintained according to department and role requirements.
TECHNOLOGY STANDARDS
Electronic health record system used for all patient documentation.
Telemedicine capabilities available for remote consultations and monitoring.
Clinical decision support tools integrated to assist with diagnosis and treatment decisions.
Contact: Dr. Patricia Williams, Chief Medical Officer
Email: pwilliams@metrohealthsystem.org
Phone: (555) 987-6543"
1 path content
2 TechCorp_CloudServices_SLA_Agreement_2024 SERVICE LEVEL AGREEMENT This Service Level Agreement ("SLA") is entered into on March 15, 2024, between TechCorp Solutions Inc., a Delaware corporation ("Provider"), and CloudFirst Enterprises LLC ("Customer"). DEFINITIONS Service Availability: The percentage of time during which the cloud services are operational and accessible. Downtime: Any period when the services are unavailable or inaccessible to Customer. Response Time: The time between service request submission and initial response from Provider. SERVICE COMMITMENTS Provider guarantees 99.9% uptime for all cloud infrastructure services during any calendar month. Average response time for API calls shall not exceed 200 milliseconds under normal operating conditions. Customer support response times: Critical issues within 1 hour, Standard issues within 4 hours. REMEDIES For each full percentage point below 99.9% availability, Customer receives 10% credit on monthly fees. If response times exceed 500ms for more than 5 minutes in any hour, Customer receives 5% monthly credit. MONITORING AND REPORTING Provider will maintain real-time monitoring systems and provide monthly performance reports. All metrics will be measured from Provider's monitoring systems located in primary data centers. This SLA remains in effect for the duration of the underlying service agreement. Executed by: TechCorp Solutions Inc. Sarah Mitchell, VP Operations Date: March 15, 2024 CloudFirst Enterprises LLC Robert Chen, CTO Date: March 16, 2024
3 DataSecure_Privacy_Policy_v3.2 PRIVACY POLICY DataSecure Analytics, Inc. ("Company") Privacy Policy Effective Date: January 1, 2024 Last Updated: February 28, 2024 INFORMATION COLLECTION We collect information you provide directly, such as account details, usage preferences, and communication records. Automatically collected data includes IP addresses, browser types, device information, and service interaction logs. Third-party integrations may provide additional user behavior and demographic information with consent. DATA USAGE Personal information is used to provide services, improve user experience, and communicate service updates. Aggregated, non-identifiable data may be used for analytics, research, and service enhancement. We do not sell personal information to third parties for marketing purposes. DATA PROTECTION All data is encrypted in transit using TLS 1.3 and at rest using AES-256 encryption. Access controls limit data access to authorized personnel only on a need-to-know basis. Regular security audits and penetration testing ensure ongoing protection measures. DATA RETENTION Personal data is retained for the duration of active service plus 24 months. Logs and analytics data are retained for 12 months unless legally required otherwise. Upon account deletion, personal data is permanently removed within 30 days. USER RIGHTS Users may request access to, correction of, or deletion of their personal information. Data portability requests will be fulfilled in standard formats within 30 days. Marketing communications can be opted out of at any time. CONTACT For privacy concerns, contact: privacy@datasecure.com Data Protection Officer: Jennifer Walsh, jwalsh@datasecure.com
4 GlobalManufacturing_SupplyChain_Contract_Q2_2024 SUPPLY CHAIN AGREEMENT This Supply Chain Agreement is entered into between GlobalManufacturing Corp ("Buyer") and PrecisionParts Ltd ("Supplier") effective April 1, 2024. SCOPE OF SERVICES Supplier will provide automotive components including brake assemblies, suspension parts, and electrical harnesses. All products must meet ISO 9001 quality standards and automotive industry specifications. Delivery schedule: Weekly shipments every Tuesday, with 48-hour advance shipping notifications. PRICING AND PAYMENT Component pricing is fixed for initial 6-month term with quarterly price review thereafter. Payment terms: Net 45 days from invoice date via electronic transfer. Volume discounts apply: 5% for orders exceeding 10,000 units per month, 8% for orders exceeding 25,000 units. QUALITY REQUIREMENTS All components must pass incoming inspection with less than 0.1% defect rate. Supplier maintains quality certifications including IATF 16949 and environmental compliance. Batch tracking and traceability required for all delivered components. LOGISTICS AND DELIVERY Supplier responsible for packaging, labeling, and delivery to Buyer's distribution centers. Delivery windows: 8 AM - 4 PM, Monday through Friday, with advance appointment scheduling. Late delivery penalties: 2% of shipment value for each day beyond scheduled delivery. RISK MANAGEMENT Supplier maintains business continuity plans and alternative sourcing strategies. Force majeure events must be reported within 24 hours with mitigation plans. Insurance requirements: $5M general liability, $2M product liability coverage. INTELLECTUAL PROPERTY All custom tooling and specifications remain property of Buyer. Supplier grants license to use necessary patents for component manufacturing. This agreement shall remain in effect for 24 months with automatic renewal unless terminated. GlobalManufacturing Corp Michael Rodriguez, Supply Chain Director Date: April 1, 2024 PrecisionParts Ltd Amanda Foster, VP Sales Date: April 2, 2024
5 EduTech_StudentData_Management_Policy_2024 STUDENT DATA MANAGEMENT POLICY EduTech Learning Platform - Data Management and Protection Policy Document Version: 2.1 Effective Date: August 15, 2024 SCOPE AND PURPOSE This policy governs the collection, use, storage, and protection of student educational records and personal information. Applies to all employees, contractors, and third-party service providers accessing student data. Compliance with FERPA, COPPA, and state student privacy laws is mandatory. DATA CLASSIFICATION Educational Records: Grades, attendance, assignments, and academic progress information. Personal Information: Names, addresses, contact details, and demographic information. Behavioral Data: Learning patterns, platform usage, and engagement metrics. COLLECTION PRINCIPLES Data collection is limited to educational purposes and service improvement only. Parental consent required for students under 13 years of age. Students and parents have right to review and request corrections to educational records. ACCESS CONTROLS Role-based access ensures personnel see only data necessary for their functions. Multi-factor authentication required for all system access. Access logs maintained and reviewed monthly for unauthorized activity. DATA SHARING Educational records shared only with authorized school personnel and parents/students. No data sharing with third parties for commercial purposes without explicit consent. Research data must be de-identified and aggregated before external sharing. SECURITY MEASURES Data encrypted using industry-standard protocols during transmission and storage. Regular security assessments and vulnerability testing conducted quarterly. Incident response plan includes notification procedures for data breaches. RETENTION AND DISPOSAL Student records retained according to school district policies, typically 5-7 years post-graduation. Inactive accounts and associated data purged after 2 years of non-use. Secure data destruction protocols ensure complete removal of sensitive information. COMPLIANCE MONITORING Annual privacy training required for all staff handling student data. Regular audits ensure ongoing compliance with applicable privacy regulations. Privacy impact assessments conducted for new features or data uses. Contact: Dr. Lisa Thompson, Chief Privacy Officer Email: privacy@edutech-learning.com Phone: (555) 123-4567
6 FinanceFirst_Investment_Advisory_Agreement_2024 INVESTMENT ADVISORY AGREEMENT This Investment Advisory Agreement is entered into between FinanceFirst Advisors LLC ("Advisor") and Madison Investment Group ("Client") on May 20, 2024. ADVISORY SERVICES Advisor will provide comprehensive investment management and financial planning services. Services include portfolio construction, asset allocation, risk assessment, and performance monitoring. Regular portfolio reviews conducted quarterly with detailed performance reporting. INVESTMENT AUTHORITY Client grants Advisor discretionary authority to make investment decisions within agreed parameters. Investment universe includes stocks, bonds, ETFs, mutual funds, and alternative investments as appropriate. All trades executed through qualified broker-dealers with best execution practices. FEE STRUCTURE Management fee: 1.25% annually on assets under management, calculated and billed quarterly. Performance fee: 15% of returns exceeding S&P 500 benchmark, calculated annually. Additional fees may apply for specialized services such as tax planning or estate planning. CLIENT RESPONSIBILITIES Client must provide accurate financial information and promptly communicate changes in circumstances. Investment objectives and risk tolerance should be reviewed and updated annually. Client responsible for reviewing and approving investment policy statement. RISK DISCLOSURE All investments carry risk of loss, and past performance does not guarantee future results. Diversification does not ensure profit or protect against loss in declining markets. Alternative investments may have limited liquidity and higher volatility. REGULATORY COMPLIANCE Advisor is registered with the Securities and Exchange Commission as an investment advisor. All activities conducted in accordance with Investment Advisers Act of 1940 and applicable regulations. Form ADV Part 2 brochure provided annually with material updates. CONFIDENTIALITY All client information treated as confidential and shared only as necessary for service provision. Third-party service providers bound by confidentiality agreements. Client data protected through secure systems and access controls. TERMINATION Either party may terminate agreement with 30 days written notice. Upon termination, Advisor will assist with orderly transfer of assets to new custodian or advisor. Final fee calculation prorated to date of termination. FinanceFirst Advisors LLC Thomas Anderson, Managing Partner Date: May 20, 2024 Madison Investment Group Rebecca Martinez, Chief Investment Officer Date: May 21, 2024
7 HealthSystem_PatientCare_Standards_2024 PATIENT CARE STANDARDS AND PROTOCOLS Metropolitan Health System - Clinical Care Standards Document ID: MHS-PCS-2024-001 Effective Date: June 1, 2024 PATIENT SAFETY PROTOCOLS All patients must have proper identification verification using two unique identifiers. Medication administration requires independent double-check for high-risk medications. Fall risk assessments completed within 4 hours of admission with appropriate interventions. CLINICAL DOCUMENTATION Medical records must be completed within 24 hours of patient encounter. All entries require electronic signature with timestamp and provider identification. Critical values and abnormal results must be communicated and documented immediately. INFECTION CONTROL Hand hygiene compliance monitored with target rate of 95% or higher. Personal protective equipment used according to transmission-based precautions. Isolation procedures implemented within 2 hours of identification of infectious conditions. EMERGENCY RESPONSE Code team response time target: 3 minutes from activation to arrival. Crash cart and emergency equipment checks performed daily and documented. All staff required to maintain current CPR and emergency response certifications. PATIENT COMMUNICATION Patient rights and responsibilities communicated upon admission. Informed consent obtained and documented prior to procedures and treatments. Family involvement encouraged with respect for patient privacy preferences. QUALITY MEASURES Patient satisfaction scores monitored monthly with target of 4.5/5.0 or higher. Medication error rates tracked with goal of less than 1 per 1000 patient days. Hospital-acquired infection rates measured and benchmarked against national standards. STAFF COMPETENCY Annual competency assessments required for all clinical staff. Continuing education requirements: 24 hours annually for nurses, 40 hours for physicians. Specialty certifications maintained according to department and role requirements. TECHNOLOGY STANDARDS Electronic health record system used for all patient documentation. Telemedicine capabilities available for remote consultations and monitoring. Clinical decision support tools integrated to assist with diagnosis and treatment decisions. Contact: Dr. Patricia Williams, Chief Medical Officer Email: pwilliams@metrohealthsystem.org Phone: (555) 987-6543

View file

@ -0,0 +1,47 @@
#!/bin/bash
set -e
WAIT_FOR_PIDS=()
log() {
timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
message="$*"
echo "$timestamp - $message"
}
cleanup() {
log "Caught signal, terminating all user processes ..."
for PID in "${WAIT_FOR_PIDS[@]}"; do
if kill $PID 2> /dev/null; then
log "killed process: $PID"
fi
done
exit 1
}
trap cleanup EXIT
# log "Starting input guards filter on port 10500..."
# uv run python -m rag_agent --host 0.0.0.0 --port 10500 --agent input_guards &
# WAIT_FOR_PIDS+=($!)
log "Starting query_parser agent on port 10501..."
uv run python -m rag_agent --host 0.0.0.0 --port 10501 --agent query_rewriter &
WAIT_FOR_PIDS+=($!)
log "Starting context_builder agent on port 10502..."
uv run python -m rag_agent --host 0.0.0.0 --port 10502 --agent context_builder &
WAIT_FOR_PIDS+=($!)
# log "Starting response_generator agent on port 10400..."
# uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
# WAIT_FOR_PIDS+=($!)
log "Starting response_generator agent on port 10505..."
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
WAIT_FOR_PIDS+=($!)
for PID in "${WAIT_FOR_PIDS[@]}"; do
wait "$PID"
done

View file

@ -0,0 +1,95 @@
@baseUrl = http://0.0.0.0:10502
@model = gpt-4o
# Health Check
GET {{baseUrl}}/health
###
# Test 1: Simple Non-Streaming Chat Completion
POST {{baseUrl}}/v1/chat/completions
Content-Type: application/json
{
"model": "{{model}}",
"messages": [
{
"role": "user",
"content": "Hello! Can you help me understand what machine learning is?"
}
]
}
###
# Test 2: Simple Streaming Chat Completion
POST {{baseUrl}}/v1/chat/completions
Content-Type: application/json
{
"model": "{{model}}",
"messages": [
{
"role": "user",
"content": "Explain the concept of artificial intelligence in simple terms."
}
],
"stream": true
}
### Test 3
POST http://localhost:8001/v1/chat/completions
Content-Type: application/json
{
"model": "{{model}}",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
],
"stream": true
}
### send request to context builder agent
POST http://localhost:10501/v1/chat/completions
Content-Type: application/json
{
"model": "gpt-4o-mini",
"messages": [
{
"role": "user",
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
]
}
### test fast-llm
POST http://localhost:12000/v1/chat/completions
Content-Type: application/json
{
"model": "fast-llm",
"messages": [
{
"role": "user",
"content": "hello"
}
]
}
### test smart-llm
POST http://localhost:12000/v1/chat/completions
Content-Type: application/json
{
"model": "smart-llm",
"messages": [
{
"role": "user",
"content": "hello"
}
]
}

1830
demos/use_cases/mcp_filter/uv.lock generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -10,7 +10,6 @@ services:
volumes:
- ../../demos/samples_python/weather_forecast/arch_config.yaml:/app/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs:/var/log/
extra_hosts:
- "host.docker.internal:host-gateway"
environment: