mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
Use mcp tools for filter chain (#621)
* agents framework demo * more changes * add more changes * pending changes * fix tests * fix more * rebase with main and better handle error from mcp * add trace for filters * add test for client error, server error and for mcp error * update schema validate code and rename kind => type in agent_filter * fix agent description and pre-commit * fix tests * add provider specific request parsing in agents chat * fix precommit and tests * cleanup demo * update readme * fix pre-commit * refactor tracing * fix fmt * fix: handle MessageContent enum in responses API conversion - Update request.rs to handle new MessageContent enum structure from main - MessageContent can now be Text(String) or Items(Vec<InputContent>) - Handle new InputItem variants (ItemReference, FunctionCallOutput) - Fixes compilation error after merging latest main (#632) * address pr feedback * fix span * fix build * update openai version
This commit is contained in:
parent
cb82a83c7b
commit
2f9121407b
40 changed files with 4886 additions and 190 deletions
|
|
@ -14,6 +14,38 @@ properties:
|
|||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
url:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- url
|
||||
filters:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
url:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
enum:
|
||||
- mcp
|
||||
transport:
|
||||
type: string
|
||||
enum:
|
||||
- streamable-http
|
||||
tool:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- url
|
||||
listeners:
|
||||
oneOf:
|
||||
- type: array
|
||||
|
|
|
|||
|
|
@ -214,21 +214,21 @@ static_resources:
|
|||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
generate_request_id: true
|
||||
tracing:
|
||||
provider:
|
||||
name: envoy.tracers.opentelemetry
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
grpc_service:
|
||||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: tools
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
# {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
# generate_request_id: true
|
||||
# tracing:
|
||||
# provider:
|
||||
# name: envoy.tracers.opentelemetry
|
||||
# typed_config:
|
||||
# "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
# grpc_service:
|
||||
# envoy_grpc:
|
||||
# cluster_name: opentelemetry_collector
|
||||
# timeout: 0.250s
|
||||
# service_name: tools
|
||||
# random_sampling:
|
||||
# value: {{ arch_tracing.random_sampling }}
|
||||
# {% endif %}
|
||||
stat_prefix: outbound_api_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
|
|
@ -299,7 +299,7 @@ static_resources:
|
|||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: arch_gateway
|
||||
service_name: plano(inbound)
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
|
|
|
|||
|
|
@ -101,8 +101,17 @@ def validate_and_render_schema():
|
|||
|
||||
# Process agents section and convert to endpoints
|
||||
agents = config_yaml.get("agents", [])
|
||||
for agent in agents:
|
||||
filters = config_yaml.get("filters", [])
|
||||
agents_combined = agents + filters
|
||||
agent_id_keys = set()
|
||||
|
||||
for agent in agents_combined:
|
||||
agent_id = agent.get("id")
|
||||
if agent_id in agent_id_keys:
|
||||
raise Exception(
|
||||
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
|
||||
)
|
||||
agent_id_keys.add(agent_id)
|
||||
agent_endpoint = agent.get("url")
|
||||
|
||||
if agent_id and agent_endpoint:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ def convert_legacy_listeners(
|
|||
"timeout": "30s",
|
||||
}
|
||||
|
||||
# Handle None case
|
||||
if listeners is None:
|
||||
return [llm_gateway_listener], llm_gateway_listener, prompt_gateway_listener
|
||||
|
||||
if isinstance(listeners, dict):
|
||||
# legacy listeners
|
||||
# check if type is array or object
|
||||
|
|
|
|||
|
|
@ -94,21 +94,16 @@ def test_validate_and_render_happy_path_agent_config(monkeypatch):
|
|||
version: v0.3.0
|
||||
|
||||
agents:
|
||||
- name: query_rewriter
|
||||
kind: openai
|
||||
endpoint: http://localhost:10500
|
||||
- name: context_builder
|
||||
kind: openai
|
||||
endpoint: http://localhost:10501
|
||||
- name: response_generator
|
||||
kind: openai
|
||||
endpoint: http://localhost:10502
|
||||
- name: research_agent
|
||||
kind: openai
|
||||
endpoint: http://localhost:10500
|
||||
- name: input_guard_rails
|
||||
kind: openai
|
||||
endpoint: http://localhost:10503
|
||||
- id: query_rewriter
|
||||
url: http://localhost:10500
|
||||
- id: context_builder
|
||||
url: http://localhost:10501
|
||||
- id: response_generator
|
||||
url: http://localhost:10502
|
||||
- id: research_agent
|
||||
url: http://localhost:10500
|
||||
- id: input_guard_rails
|
||||
url: http://localhost:10503
|
||||
|
||||
listeners:
|
||||
- name: tmobile
|
||||
|
|
@ -156,7 +151,7 @@ listeners:
|
|||
mock.mock_open().return_value, # ARCH_CONFIG_FILE_RENDERED (write)
|
||||
]
|
||||
with mock.patch("builtins.open", m_open):
|
||||
with mock.patch("config_generator.Environment"):
|
||||
with mock.patch("cli.config_generator.Environment"):
|
||||
validate_and_render_schema()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,24 @@
|
|||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
|
||||
use bytes::Bytes;
|
||||
use hermesllm::apis::openai::ChatCompletionsRequest;
|
||||
use common::consts::TRACE_PARENT_HEADER;
|
||||
use common::traces::{SpanBuilder, SpanKind, parse_traceparent, generate_random_span_id};
|
||||
use hermesllm::apis::OpenAIMessage;
|
||||
use hermesllm::clients::SupportedAPIsFromClient;
|
||||
use hermesllm::providers::request::ProviderRequest;
|
||||
use hermesllm::ProviderRequestType;
|
||||
use http_body_util::combinators::BoxBody;
|
||||
use http_body_util::BodyExt;
|
||||
use hyper::{Request, Response};
|
||||
use serde::ser::Error as SerError;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::agent_selector::{AgentSelectionError, AgentSelector};
|
||||
use super::pipeline_processor::{PipelineError, PipelineProcessor};
|
||||
use super::response_handler::ResponseHandler;
|
||||
use crate::router::llm_router::RouterService;
|
||||
use crate::tracing::{OperationNameBuilder, operation_component, http};
|
||||
|
||||
/// Main errors for agent chat completions
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -33,8 +41,17 @@ pub async fn agent_chat(
|
|||
_: String,
|
||||
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
|
||||
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
||||
trace_collector: Arc<common::traces::TraceCollector>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||
match handle_agent_chat(request, router_service, agents_list, listeners).await {
|
||||
match handle_agent_chat(
|
||||
request,
|
||||
router_service,
|
||||
agents_list,
|
||||
listeners,
|
||||
trace_collector,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => Ok(response),
|
||||
Err(err) => {
|
||||
// Check if this is a client error from the pipeline that should be cascaded
|
||||
|
|
@ -109,10 +126,11 @@ async fn handle_agent_chat(
|
|||
router_service: Arc<RouterService>,
|
||||
agents_list: Arc<tokio::sync::RwLock<Option<Vec<common::configuration::Agent>>>>,
|
||||
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
||||
trace_collector: Arc<common::traces::TraceCollector>,
|
||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
|
||||
// Initialize services
|
||||
let agent_selector = AgentSelector::new(router_service);
|
||||
let pipeline_processor = PipelineProcessor::default();
|
||||
let mut pipeline_processor = PipelineProcessor::default();
|
||||
let response_handler = ResponseHandler::new();
|
||||
|
||||
// Extract listener name from headers
|
||||
|
|
@ -132,6 +150,13 @@ async fn handle_agent_chat(
|
|||
info!("Handling request for listener: {}", listener.name);
|
||||
|
||||
// Parse request body
|
||||
let request_path = request
|
||||
.uri()
|
||||
.path()
|
||||
.to_string()
|
||||
.strip_prefix("/agents")
|
||||
.unwrap()
|
||||
.to_string();
|
||||
let request_headers = request.headers().clone();
|
||||
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||
|
||||
|
|
@ -140,61 +165,141 @@ async fn handle_agent_chat(
|
|||
String::from_utf8_lossy(&chat_request_bytes)
|
||||
);
|
||||
|
||||
let chat_completions_request: ChatCompletionsRequest =
|
||||
serde_json::from_slice(&chat_request_bytes).map_err(|err| {
|
||||
warn!(
|
||||
"Failed to parse request body as ChatCompletionsRequest: {}",
|
||||
err
|
||||
);
|
||||
AgentFilterChainError::RequestParsing(err)
|
||||
// Determine the API type from the endpoint
|
||||
let api_type =
|
||||
SupportedAPIsFromClient::from_endpoint(request_path.as_str()).ok_or_else(|| {
|
||||
let err_msg = format!("Unsupported endpoint: {}", request_path);
|
||||
warn!("{}", err_msg);
|
||||
AgentFilterChainError::RequestParsing(serde_json::Error::custom(err_msg))
|
||||
})?;
|
||||
|
||||
let client_request = match ProviderRequestType::try_from((&chat_request_bytes[..], &api_type)) {
|
||||
Ok(request) => request,
|
||||
Err(err) => {
|
||||
warn!("Failed to parse request as ProviderRequestType: {}", err);
|
||||
let err_msg = format!("Failed to parse request: {}", err);
|
||||
return Err(AgentFilterChainError::RequestParsing(
|
||||
serde_json::Error::custom(err_msg),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let message: Vec<OpenAIMessage> = client_request.get_messages();
|
||||
|
||||
// let chat_completions_request: ChatCompletionsRequest =
|
||||
// serde_json::from_slice(&chat_request_bytes).map_err(|err| {
|
||||
// warn!(
|
||||
// "Failed to parse request body as ChatCompletionsRequest: {}",
|
||||
// err
|
||||
// );
|
||||
// AgentFilterChainError::RequestParsing(err)
|
||||
// })?;
|
||||
|
||||
// Extract trace parent for routing
|
||||
let trace_parent = request_headers
|
||||
.iter()
|
||||
.find(|(key, _)| key.as_str() == "traceparent")
|
||||
.find(|(key, _)| key.as_str() == TRACE_PARENT_HEADER)
|
||||
.map(|(_, value)| value.to_str().unwrap_or_default().to_string());
|
||||
|
||||
// Select appropriate agent using arch router llm model
|
||||
let selected_agent = agent_selector
|
||||
.select_agent(&chat_completions_request.messages, &listener, trace_parent)
|
||||
.await?;
|
||||
|
||||
debug!("Processing agent pipeline: {}", selected_agent.id);
|
||||
|
||||
// Create agent map for pipeline processing
|
||||
// Create agent map for pipeline processing and agent selection
|
||||
let agent_map = {
|
||||
let agents = agents_list.read().await;
|
||||
let agents = agents.as_ref().unwrap();
|
||||
agent_selector.create_agent_map(agents)
|
||||
};
|
||||
|
||||
// Parse trace parent to get trace_id and parent_span_id
|
||||
let (trace_id, parent_span_id) = if let Some(ref tp) = trace_parent {
|
||||
parse_traceparent(tp)
|
||||
} else {
|
||||
(String::new(), None)
|
||||
};
|
||||
|
||||
// Select appropriate agent using arch router llm model
|
||||
let selected_agent = agent_selector
|
||||
.select_agent(&message, &listener, trace_parent.clone())
|
||||
.await?;
|
||||
|
||||
debug!("Processing agent pipeline: {}", selected_agent.id);
|
||||
|
||||
// Record the start time for agent span
|
||||
let agent_start_time = SystemTime::now();
|
||||
let agent_start_instant = Instant::now();
|
||||
// let (span_id, trace_id) = trace_collector.start_span(
|
||||
// trace_parent.clone(),
|
||||
// operation_component::AGENT,
|
||||
// &format!("/agents{}", request_path),
|
||||
// &selected_agent.id,
|
||||
// );
|
||||
|
||||
let span_id = generate_random_span_id();
|
||||
|
||||
// Process the filter chain
|
||||
let processed_messages = pipeline_processor
|
||||
let chat_history = pipeline_processor
|
||||
.process_filter_chain(
|
||||
&chat_completions_request,
|
||||
&message,
|
||||
&selected_agent,
|
||||
&agent_map,
|
||||
&request_headers,
|
||||
Some(&trace_collector),
|
||||
trace_id.clone(),
|
||||
span_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Get terminal agent and send final response
|
||||
let terminal_agent_name = selected_agent.id;
|
||||
let terminal_agent_name = selected_agent.id.clone();
|
||||
let terminal_agent = agent_map.get(&terminal_agent_name).unwrap();
|
||||
|
||||
debug!("Processing terminal agent: {}", terminal_agent_name);
|
||||
debug!("Terminal agent details: {:?}", terminal_agent);
|
||||
|
||||
let llm_response = pipeline_processor
|
||||
.invoke_upstream_agent(
|
||||
&processed_messages,
|
||||
&chat_completions_request,
|
||||
.invoke_agent(
|
||||
&chat_history,
|
||||
client_request,
|
||||
terminal_agent,
|
||||
&request_headers,
|
||||
trace_id.clone(),
|
||||
span_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Record agent span after processing is complete
|
||||
let agent_end_time = SystemTime::now();
|
||||
let agent_elapsed = agent_start_instant.elapsed();
|
||||
|
||||
// Build full path with /agents prefix
|
||||
let full_path = format!("/agents{}", request_path);
|
||||
|
||||
// Build operation name: POST {full_path} {agent_name}
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path(&full_path)
|
||||
.with_target(&terminal_agent_name)
|
||||
.build();
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id)
|
||||
.with_kind(SpanKind::Internal)
|
||||
.with_start_time(agent_start_time)
|
||||
.with_end_time(agent_end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, full_path)
|
||||
.with_attribute("agent.name", terminal_agent_name.clone())
|
||||
.with_attribute("duration_ms", format!("{:.2}", agent_elapsed.as_secs_f64() * 1000.0));
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id);
|
||||
}
|
||||
if let Some(parent_id) = parent_span_id {
|
||||
span_builder = span_builder.with_parent_span_id(parent_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
// Use plano(agent) as service name for the agent processing span
|
||||
trace_collector.record_span(operation_component::AGENT, span);
|
||||
|
||||
// Create streaming response
|
||||
response_handler
|
||||
.create_streaming_response(llm_response)
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ pub enum AgentSelectionError {
|
|||
RoutingError(String),
|
||||
#[error("Default agent not found for listener: {0}")]
|
||||
DefaultAgentNotFound(String),
|
||||
#[error("MCP client error: {0}")]
|
||||
McpError(String),
|
||||
}
|
||||
|
||||
/// Service for selecting agents based on routing preferences and listener configuration
|
||||
|
|
@ -29,7 +31,9 @@ pub struct AgentSelector {
|
|||
|
||||
impl AgentSelector {
|
||||
pub fn new(router_service: Arc<RouterService>) -> Self {
|
||||
Self { router_service }
|
||||
Self {
|
||||
router_service,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find listener by name from the request headers
|
||||
|
|
@ -77,7 +81,9 @@ impl AgentSelector {
|
|||
return Ok(agents[0].clone());
|
||||
}
|
||||
|
||||
let usage_preferences = self.convert_agent_description_to_routing_preferences(agents);
|
||||
let usage_preferences = self
|
||||
.convert_agent_description_to_routing_preferences(agents)
|
||||
.await;
|
||||
debug!(
|
||||
"Agents usage preferences for agent routing str: {}",
|
||||
serde_json::to_string(&usage_preferences).unwrap_or_default()
|
||||
|
|
@ -131,20 +137,23 @@ impl AgentSelector {
|
|||
}
|
||||
|
||||
/// Convert agent descriptions to routing preferences
|
||||
fn convert_agent_description_to_routing_preferences(
|
||||
async fn convert_agent_description_to_routing_preferences(
|
||||
&self,
|
||||
agents: &[AgentFilterChain],
|
||||
) -> Vec<ModelUsagePreference> {
|
||||
agents
|
||||
.iter()
|
||||
.map(|agent| ModelUsagePreference {
|
||||
model: agent.id.clone(),
|
||||
let mut preferences = Vec::new();
|
||||
|
||||
for agent_chain in agents {
|
||||
preferences.push(ModelUsagePreference {
|
||||
model: agent_chain.id.clone(),
|
||||
routing_preferences: vec![RoutingPreference {
|
||||
name: agent.id.clone(),
|
||||
description: agent.description.as_ref().unwrap_or(&String::new()).clone(),
|
||||
name: agent_chain.id.clone(),
|
||||
description: agent_chain.description.clone().unwrap_or_default(),
|
||||
}],
|
||||
})
|
||||
.collect()
|
||||
});
|
||||
}
|
||||
|
||||
preferences
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -183,8 +192,10 @@ mod tests {
|
|||
fn create_test_agent_struct(name: &str) -> Agent {
|
||||
Agent {
|
||||
id: name.to_string(),
|
||||
kind: Some("test".to_string()),
|
||||
agent_type: Some("test".to_string()),
|
||||
url: "http://localhost:8080".to_string(),
|
||||
tool: None,
|
||||
transport: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -240,8 +251,8 @@ mod tests {
|
|||
assert!(agent_map.contains_key("agent2"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_convert_agent_description_to_routing_preferences() {
|
||||
#[tokio::test]
|
||||
async fn test_convert_agent_description_to_routing_preferences() {
|
||||
let router_service = create_test_router_service();
|
||||
let selector = AgentSelector::new(router_service);
|
||||
|
||||
|
|
@ -250,7 +261,9 @@ mod tests {
|
|||
create_test_agent("agent2", "Second agent description", false),
|
||||
];
|
||||
|
||||
let preferences = selector.convert_agent_description_to_routing_preferences(&agents);
|
||||
let preferences = selector
|
||||
.convert_agent_description_to_routing_preferences(&agents)
|
||||
.await;
|
||||
|
||||
assert_eq!(preferences.len(), 2);
|
||||
assert_eq!(preferences[0].model, "agent1");
|
||||
|
|
|
|||
|
|
@ -42,19 +42,23 @@ mod integration_tests {
|
|||
// Setup services
|
||||
let router_service = create_test_router_service();
|
||||
let agent_selector = AgentSelector::new(router_service);
|
||||
let pipeline_processor = PipelineProcessor::default();
|
||||
let mut pipeline_processor = PipelineProcessor::default();
|
||||
|
||||
// Create test data
|
||||
let agents = vec![
|
||||
Agent {
|
||||
id: "filter-agent".to_string(),
|
||||
kind: Some("filter".to_string()),
|
||||
agent_type: Some("filter".to_string()),
|
||||
url: "http://localhost:8081".to_string(),
|
||||
tool: None,
|
||||
transport: None,
|
||||
},
|
||||
Agent {
|
||||
id: "terminal-agent".to_string(),
|
||||
kind: Some("terminal".to_string()),
|
||||
agent_type: Some("terminal".to_string()),
|
||||
url: "http://localhost:8082".to_string(),
|
||||
tool: None,
|
||||
transport: None,
|
||||
},
|
||||
];
|
||||
|
||||
|
|
@ -107,7 +111,15 @@ mod integration_tests {
|
|||
|
||||
let headers = HeaderMap::new();
|
||||
let result = pipeline_processor
|
||||
.process_filter_chain(&request, &test_pipeline, &agent_map, &headers)
|
||||
.process_filter_chain(
|
||||
&request.messages,
|
||||
&test_pipeline,
|
||||
&agent_map,
|
||||
&headers,
|
||||
None,
|
||||
String::new(),
|
||||
String::new(),
|
||||
)
|
||||
.await;
|
||||
|
||||
println!("Pipeline processing result: {:?}", result);
|
||||
|
|
|
|||
49
crates/brightstaff/src/handlers/jsonrpc.rs
Normal file
49
crates/brightstaff/src/handlers/jsonrpc.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub const JSON_RPC_VERSION: &str = "2.0";
|
||||
pub const TOOL_CALL_METHOD : &str = "tools/call";
|
||||
pub const MCP_INITIALIZE: &str = "initialize";
|
||||
pub const MCP_INITIALIZE_NOTIFICATION: &str = "initialize/notification";
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum JsonRpcId {
|
||||
String(String),
|
||||
Number(u64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcRequest {
|
||||
pub jsonrpc: String,
|
||||
pub id: JsonRpcId,
|
||||
pub method: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub params: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcNotification {
|
||||
pub jsonrpc: String,
|
||||
pub method: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub params: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcError {
|
||||
pub code: i32,
|
||||
pub message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub data: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcResponse {
|
||||
pub jsonrpc: String,
|
||||
pub id: JsonRpcId,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub result: Option<HashMap<String, serde_json::Value>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<JsonRpcError>,
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@ pub mod function_calling;
|
|||
pub mod pipeline_processor;
|
||||
pub mod response_handler;
|
||||
pub mod utils;
|
||||
pub mod jsonrpc;
|
||||
|
||||
#[cfg(test)]
|
||||
mod integration_tests;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,24 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use common::configuration::{Agent, AgentFilterChain};
|
||||
use common::consts::{ARCH_UPSTREAM_HOST_HEADER, ENVOY_RETRY_HEADER};
|
||||
use hermesllm::apis::openai::{ChatCompletionsRequest, Message};
|
||||
use common::consts::{
|
||||
ARCH_UPSTREAM_HOST_HEADER, BRIGHT_STAFF_SERVICE_NAME, ENVOY_RETRY_HEADER, TRACE_PARENT_HEADER,
|
||||
};
|
||||
use common::traces::{SpanBuilder, SpanKind, generate_random_span_id};
|
||||
use hermesllm::apis::openai::Message;
|
||||
use hermesllm::{ProviderRequest, ProviderRequestType};
|
||||
use hyper::header::HeaderMap;
|
||||
use tracing::{debug, warn};
|
||||
use std::time::{Instant, SystemTime};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::tracing::operation_component::{self};
|
||||
use crate::tracing::{http, OperationNameBuilder};
|
||||
|
||||
use crate::handlers::jsonrpc::{
|
||||
JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse, JSON_RPC_VERSION,
|
||||
MCP_INITIALIZE, MCP_INITIALIZE_NOTIFICATION, TOOL_CALL_METHOD,
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Errors that can occur during pipeline processing
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -19,6 +33,12 @@ pub enum PipelineError {
|
|||
NoChoicesInResponse(String),
|
||||
#[error("No content in response from agent '{0}'")]
|
||||
NoContentInResponse(String),
|
||||
#[error("No result in response from agent '{0}'")]
|
||||
NoResultInResponse(String),
|
||||
#[error("No structured content in response from agent '{0}'")]
|
||||
NoStructuredContentInResponse(String),
|
||||
#[error("No messages in response from agent '{0}'")]
|
||||
NoMessagesInResponse(String),
|
||||
#[error("Client error from agent '{agent}' (HTTP {status}): {body}")]
|
||||
ClientError {
|
||||
agent: String,
|
||||
|
|
@ -37,13 +57,17 @@ pub enum PipelineError {
|
|||
pub struct PipelineProcessor {
|
||||
client: reqwest::Client,
|
||||
url: String,
|
||||
agent_id_session_map: HashMap<String, String>,
|
||||
}
|
||||
|
||||
const ENVOY_API_ROUTER_ADDRESS: &str = "http://localhost:11000";
|
||||
|
||||
impl Default for PipelineProcessor {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
url: "http://localhost:11000/v1/chat/completions".to_string(),
|
||||
url: ENVOY_API_ROUTER_ADDRESS.to_string(),
|
||||
agent_id_session_map: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -53,18 +77,128 @@ impl PipelineProcessor {
|
|||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
url,
|
||||
agent_id_session_map: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a span for filter execution
|
||||
fn record_filter_span(
|
||||
&self,
|
||||
collector: &std::sync::Arc<common::traces::TraceCollector>,
|
||||
agent_name: &str,
|
||||
tool_name: &str,
|
||||
start_time: SystemTime,
|
||||
end_time: SystemTime,
|
||||
elapsed: std::time::Duration,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
span_id: String,
|
||||
) -> String {
|
||||
// let (trace_id, parent_span_id) = self.extract_trace_context();
|
||||
|
||||
// Build operation name: POST /agents/* {filter_name}
|
||||
// Using generic path since we don't have access to specific endpoint here
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path("/agents/*")
|
||||
.with_target(agent_name)
|
||||
.build();
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id.clone())
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_time)
|
||||
.with_end_time(end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, "/agents/*")
|
||||
.with_attribute("filter.name", agent_name.to_string())
|
||||
.with_attribute("filter.tool_name", tool_name.to_string())
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id);
|
||||
}
|
||||
if !parent_span_id.is_empty() {
|
||||
span_builder = span_builder.with_parent_span_id(parent_span_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
// Use plano(filter) as service name for filter execution spans
|
||||
collector.record_span(operation_component::AGENT_FILTER, span);
|
||||
span_id.clone()
|
||||
}
|
||||
|
||||
/// Record a span for MCP protocol interactions
|
||||
fn record_mcp_span(
|
||||
&self,
|
||||
collector: &std::sync::Arc<common::traces::TraceCollector>,
|
||||
operation: &str,
|
||||
agent_id: &str,
|
||||
start_time: SystemTime,
|
||||
end_time: SystemTime,
|
||||
elapsed: std::time::Duration,
|
||||
additional_attrs: Option<HashMap<&str, String>>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
span_id: Option<String>,
|
||||
) {
|
||||
// let (trace_id, parent_span_id) = self.extract_trace_context();
|
||||
|
||||
// Build operation name: POST /mcp {agent_id}
|
||||
let operation_name = OperationNameBuilder::new()
|
||||
.with_method("POST")
|
||||
.with_path("/mcp")
|
||||
.with_operation(operation)
|
||||
.with_target(agent_id)
|
||||
.build();
|
||||
|
||||
let mut span_builder = SpanBuilder::new(&operation_name)
|
||||
.with_span_id(span_id.unwrap_or_else(|| generate_random_span_id()))
|
||||
.with_kind(SpanKind::Client)
|
||||
.with_start_time(start_time)
|
||||
.with_end_time(end_time)
|
||||
.with_attribute(http::METHOD, "POST")
|
||||
.with_attribute(http::TARGET, &format!("/mcp ({})", operation.to_string()))
|
||||
.with_attribute("mcp.operation", operation.to_string())
|
||||
.with_attribute("mcp.agent_id", agent_id.to_string())
|
||||
.with_attribute(
|
||||
"duration_ms",
|
||||
format!("{:.2}", elapsed.as_secs_f64() * 1000.0),
|
||||
);
|
||||
|
||||
if let Some(attrs) = additional_attrs {
|
||||
for (key, value) in attrs {
|
||||
span_builder = span_builder.with_attribute(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
if !trace_id.is_empty() {
|
||||
span_builder = span_builder.with_trace_id(trace_id);
|
||||
}
|
||||
if !parent_span_id.is_empty() {
|
||||
span_builder = span_builder.with_parent_span_id(parent_span_id);
|
||||
}
|
||||
|
||||
let span = span_builder.build();
|
||||
// MCP spans also use plano(filter) service name as they are part of filter operations
|
||||
collector.record_span(operation_component::AGENT_FILTER, span);
|
||||
}
|
||||
|
||||
/// Process the filter chain of agents (all except the terminal agent)
|
||||
pub async fn process_filter_chain(
|
||||
&self,
|
||||
initial_request: &ChatCompletionsRequest,
|
||||
&mut self,
|
||||
chat_history: &[Message],
|
||||
agent_filter_chain: &AgentFilterChain,
|
||||
agent_map: &HashMap<String, Agent>,
|
||||
request_headers: &HeaderMap,
|
||||
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
let mut chat_completions_history = initial_request.messages.clone();
|
||||
let mut chat_history_updated = chat_history.to_vec();
|
||||
|
||||
for agent_name in &agent_filter_chain.filter_chain {
|
||||
debug!("Processing filter agent: {}", agent_name);
|
||||
|
|
@ -73,123 +207,490 @@ impl PipelineProcessor {
|
|||
.get(agent_name)
|
||||
.ok_or_else(|| PipelineError::AgentNotFound(agent_name.clone()))?;
|
||||
|
||||
debug!("Agent details: {:?}", agent);
|
||||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
|
||||
let response_content = self
|
||||
.send_agent_filter_chain_request(
|
||||
&chat_completions_history,
|
||||
initial_request,
|
||||
info!(
|
||||
"executing filter: {}/{}, url: {}, conversation length: {}",
|
||||
agent_name,
|
||||
tool_name,
|
||||
agent.url,
|
||||
chat_history.len()
|
||||
);
|
||||
|
||||
let start_time = SystemTime::now();
|
||||
let start_instant = Instant::now();
|
||||
|
||||
// Generate filter span ID before execution so MCP spans can use it as parent
|
||||
let filter_span_id = generate_random_span_id();
|
||||
|
||||
chat_history_updated = self
|
||||
.execute_filter(
|
||||
&chat_history_updated,
|
||||
agent,
|
||||
request_headers,
|
||||
trace_collector,
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
debug!("Received response from filter agent {}", agent_name);
|
||||
let end_time = SystemTime::now();
|
||||
let elapsed = start_instant.elapsed();
|
||||
|
||||
// Parse the response content as new message history
|
||||
chat_completions_history =
|
||||
serde_json::from_str(&response_content).inspect_err(|err| {
|
||||
warn!(
|
||||
"Failed to parse response from agent {}, err: {}, response: {}",
|
||||
agent_name, err, response_content
|
||||
)
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(chat_completions_history)
|
||||
}
|
||||
|
||||
/// Send request to a specific agent and return the response content
|
||||
async fn send_agent_filter_chain_request(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
original_request: &ChatCompletionsRequest,
|
||||
agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
) -> Result<String, PipelineError> {
|
||||
let mut request = original_request.clone();
|
||||
request.messages = messages.to_vec();
|
||||
|
||||
let request_body = serde_json::to_string(&request)?;
|
||||
debug!("Sending request to agent {}", agent.id);
|
||||
|
||||
let mut agent_headers = request_headers.clone();
|
||||
agent_headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
agent_headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&agent.id)
|
||||
.map_err(|_| PipelineError::AgentNotFound(agent.id.clone()))?,
|
||||
info!(
|
||||
"Filter '{}' completed in {:.2}ms, updated conversation length: {}",
|
||||
agent_name,
|
||||
elapsed.as_secs_f64() * 1000.0,
|
||||
chat_history_updated.len()
|
||||
);
|
||||
|
||||
agent_headers.insert(
|
||||
// Record span for this filter execution
|
||||
if let Some(collector) = trace_collector {
|
||||
self.record_filter_span(
|
||||
collector,
|
||||
agent_name,
|
||||
tool_name,
|
||||
start_time,
|
||||
end_time,
|
||||
elapsed,
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
filter_span_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(chat_history_updated)
|
||||
}
|
||||
|
||||
/// Build common MCP headers for requests
|
||||
fn build_mcp_headers(
|
||||
&self,
|
||||
request_headers: &HeaderMap,
|
||||
agent_id: &str,
|
||||
session_id: Option<&str>,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> Result<HeaderMap, PipelineError> {
|
||||
let trace_parent = format!("00-{}-{}-01", trace_id, parent_span_id);
|
||||
let mut headers = request_headers.clone();
|
||||
headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
|
||||
headers.remove(TRACE_PARENT_HEADER);
|
||||
headers.insert(
|
||||
TRACE_PARENT_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
|
||||
headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
hyper::header::HeaderValue::from_str(agent_id)
|
||||
.map_err(|_| PipelineError::AgentNotFound(agent_id.to_string()))?,
|
||||
);
|
||||
|
||||
headers.insert(
|
||||
ENVOY_RETRY_HEADER,
|
||||
hyper::header::HeaderValue::from_str("3").unwrap(),
|
||||
);
|
||||
|
||||
headers.insert(
|
||||
"Accept",
|
||||
hyper::header::HeaderValue::from_static("application/json, text/event-stream"),
|
||||
);
|
||||
|
||||
headers.insert(
|
||||
"Content-Type",
|
||||
hyper::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
if let Some(sid) = session_id {
|
||||
headers.insert(
|
||||
"mcp-session-id",
|
||||
hyper::header::HeaderValue::from_str(sid).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(headers)
|
||||
}
|
||||
|
||||
/// Parse SSE formatted response and extract JSON-RPC data
|
||||
fn parse_sse_response(
|
||||
&self,
|
||||
response_bytes: &[u8],
|
||||
agent_id: &str,
|
||||
) -> Result<String, PipelineError> {
|
||||
let response_str = String::from_utf8_lossy(response_bytes);
|
||||
let lines: Vec<&str> = response_str.lines().collect();
|
||||
|
||||
// Validate SSE format: first line should be "event: message"
|
||||
if lines.is_empty() || lines[0] != "event: message" {
|
||||
warn!(
|
||||
"Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}",
|
||||
agent_id,
|
||||
lines.first()
|
||||
);
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
|
||||
agent_id
|
||||
)));
|
||||
}
|
||||
|
||||
// Find the data line
|
||||
let data_lines: Vec<&str> = lines
|
||||
.iter()
|
||||
.filter(|line| line.starts_with("data: "))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
if data_lines.len() != 1 {
|
||||
warn!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
agent_id,
|
||||
data_lines.len()
|
||||
);
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
agent_id,
|
||||
data_lines.len()
|
||||
)));
|
||||
}
|
||||
|
||||
// Skip "data: " prefix
|
||||
Ok(data_lines[0][6..].to_string())
|
||||
}
|
||||
|
||||
/// Send an MCP request and return the response
|
||||
async fn send_mcp_request(
|
||||
&self,
|
||||
json_rpc_request: &JsonRpcRequest,
|
||||
headers: HeaderMap,
|
||||
agent_id: &str,
|
||||
) -> Result<reqwest::Response, PipelineError> {
|
||||
let request_body = serde_json::to_string(json_rpc_request)?;
|
||||
|
||||
debug!(
|
||||
"Sending MCP request to agent {}: {}",
|
||||
agent_id, request_body
|
||||
);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&self.url)
|
||||
.headers(agent_headers)
|
||||
.post(format!("{}/mcp", self.url))
|
||||
.headers(headers)
|
||||
.body(request_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
let status = response.status();
|
||||
Ok(response)
|
||||
}
|
||||
|
||||
/// Build a tools/call JSON-RPC request
|
||||
fn build_tool_call_request(
|
||||
&self,
|
||||
tool_name: &str,
|
||||
messages: &[Message],
|
||||
) -> Result<JsonRpcRequest, PipelineError> {
|
||||
let mut arguments = HashMap::new();
|
||||
arguments.insert("messages".to_string(), serde_json::to_value(messages)?);
|
||||
|
||||
let mut params = HashMap::new();
|
||||
params.insert("name".to_string(), serde_json::to_value(tool_name)?);
|
||||
params.insert("arguments".to_string(), serde_json::to_value(arguments)?);
|
||||
|
||||
Ok(JsonRpcRequest {
|
||||
jsonrpc: JSON_RPC_VERSION.to_string(),
|
||||
id: JsonRpcId::String(Uuid::new_v4().to_string()),
|
||||
method: TOOL_CALL_METHOD.to_string(),
|
||||
params: Some(params),
|
||||
})
|
||||
}
|
||||
|
||||
/// Send request to a specific agent and return the response content
|
||||
async fn execute_filter(
|
||||
&mut self,
|
||||
messages: &[Message],
|
||||
agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
trace_collector: Option<&std::sync::Arc<common::traces::TraceCollector>>,
|
||||
trace_id: String,
|
||||
filter_span_id: String,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
// Get or create MCP session
|
||||
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
|
||||
session_id.clone()
|
||||
} else {
|
||||
let session_id = self
|
||||
.get_new_session_id(
|
||||
&agent.id,
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
)
|
||||
.await;
|
||||
self.agent_id_session_map
|
||||
.insert(agent.id.clone(), session_id.clone());
|
||||
session_id
|
||||
};
|
||||
|
||||
info!(
|
||||
"Using MCP session ID {} for agent {}",
|
||||
mcp_session_id, agent.id
|
||||
);
|
||||
|
||||
// Build JSON-RPC request
|
||||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
let json_rpc_request = self.build_tool_call_request(tool_name, messages)?;
|
||||
|
||||
// Generate span ID for this MCP tool call (child of filter span)
|
||||
let mcp_span_id = generate_random_span_id();
|
||||
|
||||
// Build headers
|
||||
let agent_headers =
|
||||
self.build_mcp_headers(request_headers, &agent.id, Some(&mcp_session_id), trace_id.clone(), mcp_span_id.clone())?;
|
||||
|
||||
// Send request with tracing
|
||||
let start_time = SystemTime::now();
|
||||
let start_instant = Instant::now();
|
||||
|
||||
let response = self
|
||||
.send_mcp_request(
|
||||
&json_rpc_request,
|
||||
agent_headers,
|
||||
&agent.id,
|
||||
)
|
||||
.await?;
|
||||
let http_status = response.status();
|
||||
let response_bytes = response.bytes().await?;
|
||||
|
||||
// Check for HTTP errors and handle them appropriately
|
||||
if !status.is_success() {
|
||||
let end_time = SystemTime::now();
|
||||
let elapsed = start_instant.elapsed();
|
||||
|
||||
// Record MCP tool call span
|
||||
if let Some(collector) = trace_collector {
|
||||
let mut attrs = HashMap::new();
|
||||
attrs.insert("mcp.method", "tools/call".to_string());
|
||||
attrs.insert("mcp.tool_name", tool_name.to_string());
|
||||
attrs.insert("mcp.session_id", mcp_session_id.clone());
|
||||
attrs.insert("http.status_code", http_status.as_u16().to_string());
|
||||
|
||||
self.record_mcp_span(
|
||||
collector,
|
||||
"tool_call",
|
||||
&agent.id,
|
||||
start_time,
|
||||
end_time,
|
||||
elapsed,
|
||||
Some(attrs),
|
||||
trace_id.clone(),
|
||||
filter_span_id.clone(),
|
||||
Some(mcp_span_id),
|
||||
);
|
||||
}
|
||||
|
||||
// Handle HTTP errors
|
||||
if !http_status.is_success() {
|
||||
let error_body = String::from_utf8_lossy(&response_bytes).to_string();
|
||||
|
||||
if status.is_client_error() {
|
||||
// 4xx errors - cascade back to developer
|
||||
return Err(PipelineError::ClientError {
|
||||
return Err(if http_status.is_client_error() {
|
||||
PipelineError::ClientError {
|
||||
agent: agent.id.clone(),
|
||||
status: status.as_u16(),
|
||||
status: http_status.as_u16(),
|
||||
body: error_body,
|
||||
});
|
||||
} else if status.is_server_error() {
|
||||
// 5xx errors - server/agent error
|
||||
return Err(PipelineError::ServerError {
|
||||
}
|
||||
} else {
|
||||
PipelineError::ServerError {
|
||||
agent: agent.id.clone(),
|
||||
status: status.as_u16(),
|
||||
status: http_status.as_u16(),
|
||||
body: error_body,
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Parse the response as JSON to extract the content
|
||||
let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
|
||||
info!(
|
||||
"Response from agent {}: {}",
|
||||
agent.id,
|
||||
String::from_utf8_lossy(&response_bytes)
|
||||
);
|
||||
|
||||
let content = response_json
|
||||
.get("choices")
|
||||
.and_then(|choices| choices.as_array())
|
||||
.and_then(|choices| choices.first())
|
||||
.and_then(|choice| choice.get("message"))
|
||||
.and_then(|message| message.get("content"))
|
||||
.and_then(|content| content.as_str())
|
||||
.ok_or_else(|| PipelineError::NoContentInResponse(agent.id.clone()))?
|
||||
// Parse SSE response
|
||||
let data_chunk = self.parse_sse_response(&response_bytes, &agent.id)?;
|
||||
let response: JsonRpcResponse = serde_json::from_str(&data_chunk)?;
|
||||
let response_result = response
|
||||
.result
|
||||
.ok_or_else(|| PipelineError::NoResultInResponse(agent.id.clone()))?;
|
||||
|
||||
// Check if error field is set in response result
|
||||
if response_result
|
||||
.get("isError")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
let error_message = response_result
|
||||
.get("content")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|arr| arr.first())
|
||||
.and_then(|v| v.get("text"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown_error")
|
||||
.to_string();
|
||||
|
||||
Ok(content)
|
||||
return Err(PipelineError::ClientError {
|
||||
agent: agent.id.clone(),
|
||||
status: http_status.as_u16(),
|
||||
body: error_message,
|
||||
});
|
||||
}
|
||||
|
||||
// Extract structured content and parse messages
|
||||
let response_json = response_result
|
||||
.get("structuredContent")
|
||||
.ok_or_else(|| PipelineError::NoStructuredContentInResponse(agent.id.clone()))?;
|
||||
|
||||
let messages: Vec<Message> = response_json
|
||||
.get("result")
|
||||
.and_then(|v| v.as_array())
|
||||
.ok_or_else(|| PipelineError::NoMessagesInResponse(agent.id.clone()))?
|
||||
.iter()
|
||||
.map(|msg_value| serde_json::from_value(msg_value.clone()))
|
||||
.collect::<Result<Vec<Message>, _>>()
|
||||
.map_err(PipelineError::ParseError)?;
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
/// Build an initialize JSON-RPC request
|
||||
fn build_initialize_request(&self) -> JsonRpcRequest {
|
||||
JsonRpcRequest {
|
||||
jsonrpc: JSON_RPC_VERSION.to_string(),
|
||||
id: JsonRpcId::String(Uuid::new_v4().to_string()),
|
||||
method: MCP_INITIALIZE.to_string(),
|
||||
params: Some({
|
||||
let mut params = HashMap::new();
|
||||
params.insert(
|
||||
"protocolVersion".to_string(),
|
||||
serde_json::Value::String("2024-11-05".to_string()),
|
||||
);
|
||||
params.insert("capabilities".to_string(), serde_json::json!({}));
|
||||
params.insert(
|
||||
"clientInfo".to_string(),
|
||||
serde_json::json!({
|
||||
"name": BRIGHT_STAFF_SERVICE_NAME,
|
||||
"version": "1.0.0"
|
||||
}),
|
||||
);
|
||||
params
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Send initialized notification after session creation
|
||||
async fn send_initialized_notification(
|
||||
&self,
|
||||
agent_id: &str,
|
||||
session_id: &str,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> Result<(), PipelineError> {
|
||||
let initialized_notification = JsonRpcNotification {
|
||||
jsonrpc: JSON_RPC_VERSION.to_string(),
|
||||
method: MCP_INITIALIZE_NOTIFICATION.to_string(),
|
||||
params: None,
|
||||
};
|
||||
|
||||
let notification_body = serde_json::to_string(&initialized_notification)?;
|
||||
debug!("Sending initialized notification for agent {}", agent_id);
|
||||
|
||||
let headers = self.build_mcp_headers(&HeaderMap::new(), agent_id, Some(session_id), trace_id.clone(), parent_span_id.clone())?;
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(format!("{}/mcp", self.url))
|
||||
.headers(headers)
|
||||
.body(notification_body)
|
||||
.send()
|
||||
.await?;
|
||||
|
||||
info!(
|
||||
"Initialized notification response status: {}",
|
||||
response.status()
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_new_session_id(
|
||||
&self,
|
||||
agent_id: &str,
|
||||
trace_id: String,
|
||||
parent_span_id: String,
|
||||
) -> String {
|
||||
info!("Initializing MCP session for agent {}", agent_id);
|
||||
|
||||
let initialize_request = self.build_initialize_request();
|
||||
let headers = self
|
||||
.build_mcp_headers(&HeaderMap::new(), agent_id, None, trace_id.clone(), parent_span_id.clone())
|
||||
.expect("Failed to build headers for initialization");
|
||||
|
||||
let response = self
|
||||
.send_mcp_request(&initialize_request, headers, agent_id)
|
||||
.await
|
||||
.expect("Failed to initialize MCP session");
|
||||
|
||||
info!("Initialize response status: {}", response.status());
|
||||
|
||||
let session_id = response
|
||||
.headers()
|
||||
.get("mcp-session-id")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.expect("No mcp-session-id in response")
|
||||
.to_string();
|
||||
|
||||
info!(
|
||||
"Created new MCP session for agent {}: {}",
|
||||
agent_id, session_id
|
||||
);
|
||||
|
||||
// Send initialized notification
|
||||
self.send_initialized_notification(
|
||||
agent_id,
|
||||
&session_id,
|
||||
trace_id.clone(),
|
||||
parent_span_id.clone(),
|
||||
)
|
||||
.await
|
||||
.expect("Failed to send initialized notification");
|
||||
|
||||
session_id
|
||||
}
|
||||
|
||||
/// Send request to terminal agent and return the raw response for streaming
|
||||
pub async fn invoke_upstream_agent(
|
||||
pub async fn invoke_agent(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
original_request: &ChatCompletionsRequest,
|
||||
mut original_request: ProviderRequestType,
|
||||
terminal_agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
trace_id: String,
|
||||
agent_span_id: String,
|
||||
) -> Result<reqwest::Response, PipelineError> {
|
||||
let mut request = original_request.clone();
|
||||
request.messages = messages.to_vec();
|
||||
// let mut request = original_request.clone();
|
||||
original_request.set_messages(messages);
|
||||
|
||||
let request_body = serde_json::to_string(&request)?;
|
||||
let request_body = ProviderRequestType::to_bytes(&original_request).unwrap();
|
||||
// let request_body = serde_json::to_string(&request)?;
|
||||
debug!("Sending request to terminal agent {}", terminal_agent.id);
|
||||
|
||||
let mut agent_headers = request_headers.clone();
|
||||
agent_headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
|
||||
// Set traceparent header to make the egress span a child of the agent span
|
||||
if !trace_id.is_empty() && !agent_span_id.is_empty() {
|
||||
let trace_parent = format!("00-{}-{}-01", trace_id, agent_span_id);
|
||||
agent_headers.remove(TRACE_PARENT_HEADER);
|
||||
agent_headers.insert(
|
||||
TRACE_PARENT_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&trace_parent).unwrap(),
|
||||
);
|
||||
}
|
||||
|
||||
agent_headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
hyper::header::HeaderValue::from_str(&terminal_agent.id)
|
||||
|
|
@ -203,7 +704,7 @@ impl PipelineProcessor {
|
|||
|
||||
let response = self
|
||||
.client
|
||||
.post(&self.url)
|
||||
.post(format!("{}/v1/chat/completions", self.url))
|
||||
.headers(agent_headers)
|
||||
.body(request_body)
|
||||
.send()
|
||||
|
|
@ -217,6 +718,7 @@ impl PipelineProcessor {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use hermesllm::apis::openai::{Message, MessageContent, Role};
|
||||
use mockito::Server;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn create_test_message(role: Role, content: &str) -> Message {
|
||||
|
|
@ -240,23 +742,149 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_agent_not_found_error() {
|
||||
let processor = PipelineProcessor::default();
|
||||
let mut processor = PipelineProcessor::default();
|
||||
let agent_map = HashMap::new();
|
||||
let request_headers = HeaderMap::new();
|
||||
|
||||
let initial_request = ChatCompletionsRequest {
|
||||
messages: vec![create_test_message(Role::User, "Hello")],
|
||||
model: "test-model".to_string(),
|
||||
..Default::default()
|
||||
};
|
||||
let messages = vec![create_test_message(Role::User, "Hello")];
|
||||
|
||||
let pipeline = create_test_pipeline(vec!["nonexistent-agent", "terminal-agent"]);
|
||||
|
||||
let result = processor
|
||||
.process_filter_chain(&initial_request, &pipeline, &agent_map, &request_headers)
|
||||
.process_filter_chain(&messages, &pipeline, &agent_map, &request_headers, None, String::new(), String::new())
|
||||
.await;
|
||||
|
||||
assert!(result.is_err());
|
||||
matches!(result.unwrap_err(), PipelineError::AgentNotFound(_));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_filter_http_status_error() {
|
||||
let mut server = Server::new_async().await;
|
||||
let _m = server
|
||||
.mock("POST", "/mcp")
|
||||
.with_status(500)
|
||||
.with_body("boom")
|
||||
.create();
|
||||
|
||||
let server_url = server.url();
|
||||
let mut processor = PipelineProcessor::new(server_url.clone());
|
||||
processor
|
||||
.agent_id_session_map
|
||||
.insert("agent-1".to_string(), "session-1".to_string());
|
||||
|
||||
let agent = Agent {
|
||||
id: "agent-1".to_string(),
|
||||
transport: None,
|
||||
tool: None,
|
||||
url: server_url,
|
||||
agent_type: None,
|
||||
};
|
||||
|
||||
let messages = vec![create_test_message(Role::User, "Hello")];
|
||||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_filter(&messages, &agent, &request_headers, None, "trace-123".to_string(), "span-123".to_string())
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(PipelineError::ServerError { status, body, .. }) => {
|
||||
assert_eq!(status, 500);
|
||||
assert_eq!(body, "boom");
|
||||
}
|
||||
_ => panic!("Expected server error for 500 status"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_filter_http_client_error() {
|
||||
let mut server = Server::new_async().await;
|
||||
let _m = server
|
||||
.mock("POST", "/mcp")
|
||||
.with_status(400)
|
||||
.with_body("bad request")
|
||||
.create();
|
||||
|
||||
let server_url = server.url();
|
||||
let mut processor = PipelineProcessor::new(server_url.clone());
|
||||
processor
|
||||
.agent_id_session_map
|
||||
.insert("agent-3".to_string(), "session-3".to_string());
|
||||
|
||||
let agent = Agent {
|
||||
id: "agent-3".to_string(),
|
||||
transport: None,
|
||||
tool: None,
|
||||
url: server_url,
|
||||
agent_type: None,
|
||||
};
|
||||
|
||||
let messages = vec![create_test_message(Role::User, "Ping")];
|
||||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_filter(&messages, &agent, &request_headers, None, "trace-456".to_string(), "span-456".to_string())
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(PipelineError::ClientError { status, body, .. }) => {
|
||||
assert_eq!(status, 400);
|
||||
assert_eq!(body, "bad request");
|
||||
}
|
||||
_ => panic!("Expected client error for 400 status"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_execute_filter_mcp_error_flag() {
|
||||
let rpc_body = serde_json::json!({
|
||||
"jsonrpc": JSON_RPC_VERSION,
|
||||
"id": "1",
|
||||
"result": {
|
||||
"isError": true,
|
||||
"content": [
|
||||
{ "text": "bad tool call" }
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let sse_body = format!("event: message\ndata: {}\n\n", rpc_body.to_string());
|
||||
|
||||
let mut server = Server::new_async().await;
|
||||
let _m = server
|
||||
.mock("POST", "/mcp")
|
||||
.with_status(200)
|
||||
.with_body(sse_body)
|
||||
.create();
|
||||
|
||||
let server_url = server.url();
|
||||
let mut processor = PipelineProcessor::new(server_url.clone());
|
||||
processor
|
||||
.agent_id_session_map
|
||||
.insert("agent-2".to_string(), "session-2".to_string());
|
||||
|
||||
let agent = Agent {
|
||||
id: "agent-2".to_string(),
|
||||
transport: None,
|
||||
tool: None,
|
||||
url: server_url,
|
||||
agent_type: None,
|
||||
};
|
||||
|
||||
let messages = vec![create_test_message(Role::User, "Hi")];
|
||||
let request_headers = HeaderMap::new();
|
||||
|
||||
let result = processor
|
||||
.execute_filter(&messages, &agent, &request_headers, None, "trace-789".to_string(), "span-789".to_string())
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Err(PipelineError::ClientError { status, body, .. }) => {
|
||||
assert_eq!(status, 200);
|
||||
assert_eq!(body, "bad tool call");
|
||||
}
|
||||
_ => panic!("Expected client error when isError flag is set"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
use brightstaff::handlers::agent_chat_completions::agent_chat;
|
||||
use brightstaff::handlers::function_calling::function_calling_chat_handler;
|
||||
use brightstaff::handlers::llm::llm_chat;
|
||||
use brightstaff::handlers::models::list_models;
|
||||
use brightstaff::handlers::function_calling::{function_calling_chat_handler};
|
||||
use brightstaff::router::llm_router::RouterService;
|
||||
use brightstaff::state::StateStorage;
|
||||
use brightstaff::state::postgresql::PostgreSQLConversationStorage;
|
||||
use brightstaff::state::memory::MemoryConversationalStorage;
|
||||
use brightstaff::utils::tracing::init_tracer;
|
||||
use bytes::Bytes;
|
||||
use common::configuration::Configuration;
|
||||
use common::configuration::{Agent, Configuration};
|
||||
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
|
||||
use common::traces::TraceCollector;
|
||||
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
||||
|
|
@ -63,8 +63,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
|
||||
let arch_config = Arc::new(config);
|
||||
|
||||
// combine agents and filters into a single list of agents
|
||||
let all_agents: Vec<Agent> = arch_config
|
||||
.agents
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.chain(arch_config.filters.as_deref().unwrap_or_default())
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
|
||||
let agents_list = Arc::new(RwLock::new(arch_config.agents.clone()));
|
||||
let combined_agents_filters_list = Arc::new(RwLock::new(Some(all_agents)));
|
||||
let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
|
||||
let llm_provider_url =
|
||||
env::var("LLM_PROVIDER_ENDPOINT").unwrap_or_else(|_| "http://localhost:12001".to_string());
|
||||
|
|
@ -98,7 +108,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
info!("Tracing configuration found in arch_config.yaml");
|
||||
Some(true)
|
||||
} else {
|
||||
info!("No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var");
|
||||
info!(
|
||||
"No tracing configuration in arch_config.yaml, will check OTEL_TRACING_ENABLED env var"
|
||||
);
|
||||
None
|
||||
};
|
||||
let trace_collector = Arc::new(TraceCollector::new(tracing_enabled));
|
||||
|
|
@ -142,11 +154,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let io = TokioIo::new(stream);
|
||||
|
||||
let router_service: Arc<RouterService> = Arc::clone(&router_service);
|
||||
let model_aliases: Arc<Option<std::collections::HashMap<String, common::configuration::ModelAlias>>> = Arc::clone(&model_aliases);
|
||||
let model_aliases: Arc<
|
||||
Option<std::collections::HashMap<String, common::configuration::ModelAlias>>,
|
||||
> = Arc::clone(&model_aliases);
|
||||
let llm_provider_url = llm_provider_url.clone();
|
||||
|
||||
let llm_providers = llm_providers.clone();
|
||||
let agents_list = agents_list.clone();
|
||||
let agents_list = combined_agents_filters_list.clone();
|
||||
let listeners = listeners.clone();
|
||||
let trace_collector = trace_collector.clone();
|
||||
let state_storage = state_storage.clone();
|
||||
|
|
@ -162,28 +176,36 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let state_storage = state_storage.clone();
|
||||
|
||||
async move {
|
||||
match (req.method(), req.uri().path()) {
|
||||
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||
let fully_qualified_url =
|
||||
format!("{}{}", llm_provider_url, req.uri().path());
|
||||
llm_chat(req, router_service, fully_qualified_url, model_aliases, llm_providers, trace_collector, state_storage)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
(&Method::POST, "/agents/v1/chat/completions") => {
|
||||
let fully_qualified_url =
|
||||
format!("{}{}", llm_provider_url, req.uri().path());
|
||||
agent_chat(
|
||||
let path = req.uri().path();
|
||||
// Check if path starts with /agents
|
||||
if path.starts_with("/agents") {
|
||||
// Check if it matches one of the agent API paths
|
||||
let stripped_path = path.strip_prefix("/agents").unwrap();
|
||||
if matches!(
|
||||
stripped_path,
|
||||
CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH
|
||||
) {
|
||||
let fully_qualified_url = format!("{}{}", llm_provider_url, stripped_path);
|
||||
return agent_chat(
|
||||
req,
|
||||
router_service,
|
||||
fully_qualified_url,
|
||||
agents_list,
|
||||
listeners,
|
||||
trace_collector,
|
||||
)
|
||||
.with_context(parent_cx)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
match (req.method(), path) {
|
||||
(&Method::POST, CHAT_COMPLETIONS_PATH | MESSAGES_PATH | OPENAI_RESPONSES_API_PATH) => {
|
||||
let fully_qualified_url =
|
||||
format!("{}{}", llm_provider_url, path);
|
||||
llm_chat(req, router_service, fully_qualified_url, model_aliases, llm_providers, trace_collector, state_storage)
|
||||
.with_context(parent_cx)
|
||||
.await
|
||||
}
|
||||
|
||||
(&Method::POST, "/function_calling") => {
|
||||
let fully_qualified_url =
|
||||
format!("{}{}", llm_provider_url, "/v1/chat/completions");
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ pub mod operation_component {
|
|||
pub const HANDOFF: &str = "plano(handoff)";
|
||||
|
||||
/// Agent filter execution
|
||||
pub const AGENT_FILTER: &str = "plano(agent filter)";
|
||||
pub const AGENT_FILTER: &str = "plano(filter)";
|
||||
|
||||
/// Agent execution
|
||||
pub const AGENT: &str = "plano(agent)";
|
||||
|
|
@ -203,6 +203,7 @@ pub mod operation_component {
|
|||
pub struct OperationNameBuilder {
|
||||
method: Option<String>,
|
||||
path: Option<String>,
|
||||
operation: Option<String>,
|
||||
target: Option<String>,
|
||||
}
|
||||
|
||||
|
|
@ -212,6 +213,7 @@ impl OperationNameBuilder {
|
|||
Self {
|
||||
method: None,
|
||||
path: None,
|
||||
operation: None,
|
||||
target: None,
|
||||
}
|
||||
}
|
||||
|
|
@ -234,6 +236,15 @@ impl OperationNameBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
/// Set the operation type (optional, for MCP operations)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `operation` - Operation type (e.g., "tool_call", "session_init", "notification")
|
||||
pub fn with_operation(mut self, operation: impl Into<String>) -> Self {
|
||||
self.operation = Some(operation.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the target (model name, agent name, or filter name)
|
||||
///
|
||||
/// # Arguments
|
||||
|
|
@ -246,7 +257,8 @@ impl OperationNameBuilder {
|
|||
/// Build the operation name string
|
||||
///
|
||||
/// # Format
|
||||
/// - With all components: `{method} {path} {target}`
|
||||
/// - With all components: `{method} {path} ({operation}) {target}`
|
||||
/// - Without operation: `{method} {path} {target}`
|
||||
/// - Without target: `{method} {path}`
|
||||
/// - Without path: `{method}`
|
||||
/// - Empty: returns empty string
|
||||
|
|
@ -258,8 +270,12 @@ impl OperationNameBuilder {
|
|||
}
|
||||
|
||||
if let Some(path) = self.path {
|
||||
if let Some(operation) = self.operation {
|
||||
parts.push(format!("{} ({})", path, operation));
|
||||
} else {
|
||||
parts.push(path);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(target) = self.target {
|
||||
parts.push(target);
|
||||
|
|
|
|||
1
crates/build.sh
Normal file
1
crates/build.sh
Normal file
|
|
@ -0,0 +1 @@
|
|||
cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway && cargo build --release -p brightstaff
|
||||
|
|
@ -21,8 +21,11 @@ pub struct ModelAlias {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Agent {
|
||||
pub id: String,
|
||||
pub kind: Option<String>,
|
||||
pub transport: Option<String>,
|
||||
pub tool: Option<String>,
|
||||
pub url: String,
|
||||
#[serde(rename = "type")]
|
||||
pub agent_type: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -71,6 +74,7 @@ pub struct Configuration {
|
|||
pub mode: Option<GatewayMode>,
|
||||
pub routing: Option<Routing>,
|
||||
pub agents: Option<Vec<Agent>>,
|
||||
pub filters: Option<Vec<Agent>>,
|
||||
pub listeners: Vec<Listener>,
|
||||
pub state_storage: Option<StateStorageConfig>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,3 +32,4 @@ pub const OTEL_COLLECTOR_HTTP: &str = "opentelemetry_collector_http";
|
|||
pub const OTEL_POST_PATH: &str = "/v1/traces";
|
||||
pub const LLM_ROUTE_HEADER: &str = "x-arch-llm-route";
|
||||
pub const ENVOY_RETRY_HEADER: &str = "x-envoy-max-retries";
|
||||
pub const BRIGHT_STAFF_SERVICE_NAME : &str = "brightstaff";
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ pub use shapes::{
|
|||
};
|
||||
|
||||
// Re-export new utilities
|
||||
pub use span_builder::{SpanBuilder, SpanKind};
|
||||
pub use span_builder::{SpanBuilder, SpanKind, generate_random_span_id};
|
||||
pub use resource_span_builder::ResourceSpanBuilder;
|
||||
pub use constants::*;
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ pub struct SpanBuilder {
|
|||
end_time: Option<SystemTime>,
|
||||
kind: SpanKind,
|
||||
attributes: HashMap<String, String>,
|
||||
span_id: Option<String>,
|
||||
}
|
||||
|
||||
impl SpanBuilder {
|
||||
|
|
@ -53,6 +54,7 @@ impl SpanBuilder {
|
|||
end_time: None,
|
||||
kind: SpanKind::Internal,
|
||||
attributes: HashMap::new(),
|
||||
span_id: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -62,6 +64,11 @@ impl SpanBuilder {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn with_span_id(mut self, span_id: impl Into<String>) -> Self {
|
||||
self.span_id = Some(span_id.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the parent span ID to link this span to its parent
|
||||
pub fn with_parent_span_id(mut self, parent_span_id: impl Into<String>) -> Self {
|
||||
self.parent_span_id = Some(parent_span_id.into());
|
||||
|
|
@ -125,7 +132,7 @@ impl SpanBuilder {
|
|||
// Build span directly without going through Span::new()
|
||||
Span {
|
||||
trace_id,
|
||||
span_id: generate_random_span_id(),
|
||||
span_id: self.span_id.unwrap_or_else(|| generate_random_span_id()),
|
||||
parent_span_id: self.parent_span_id,
|
||||
name: self.name,
|
||||
start_time_unix_nano: format!("{}", start_nanos),
|
||||
|
|
@ -145,7 +152,7 @@ fn system_time_to_nanos(time: SystemTime) -> u128 {
|
|||
}
|
||||
|
||||
/// Generate a random span ID (16 hex characters = 8 bytes)
|
||||
fn generate_random_span_id() -> String {
|
||||
pub fn generate_random_span_id() -> String {
|
||||
use rand::RngCore;
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut random_bytes = [0u8; 8];
|
||||
|
|
|
|||
|
|
@ -233,6 +233,104 @@ impl ProviderRequest for ConverseRequest {
|
|||
fn get_temperature(&self) -> Option<f32> {
|
||||
self.inference_config.as_ref()?.temperature
|
||||
}
|
||||
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
|
||||
use crate::apis::openai::{Message, MessageContent, Role};
|
||||
|
||||
let mut openai_messages = Vec::new();
|
||||
|
||||
// Add system messages if present
|
||||
if let Some(system) = &self.system {
|
||||
for sys_block in system {
|
||||
match sys_block {
|
||||
SystemContentBlock::Text { text } => {
|
||||
openai_messages.push(Message {
|
||||
role: Role::System,
|
||||
content: MessageContent::Text(text.clone()),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
_ => {} // Skip other system content types
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert conversation messages
|
||||
if let Some(messages) = &self.messages {
|
||||
for msg in messages {
|
||||
let role = match msg.role {
|
||||
ConversationRole::User => Role::User,
|
||||
ConversationRole::Assistant => Role::Assistant,
|
||||
};
|
||||
|
||||
// Extract text from content blocks
|
||||
let content = msg.content.iter()
|
||||
.filter_map(|block| {
|
||||
if let ContentBlock::Text { text } = block {
|
||||
Some(text.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
openai_messages.push(Message {
|
||||
role,
|
||||
content: MessageContent::Text(content),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
openai_messages
|
||||
}
|
||||
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
// Convert OpenAI messages to Bedrock format
|
||||
use crate::apis::amazon_bedrock::{ContentBlock, ConversationRole, SystemContentBlock};
|
||||
|
||||
let mut system_blocks = Vec::new();
|
||||
let mut bedrock_messages = Vec::new();
|
||||
|
||||
for msg in messages {
|
||||
match msg.role {
|
||||
crate::apis::openai::Role::System => {
|
||||
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
|
||||
system_blocks.push(SystemContentBlock::Text { text: text.clone() });
|
||||
}
|
||||
}
|
||||
crate::apis::openai::Role::User | crate::apis::openai::Role::Assistant => {
|
||||
let role = match msg.role {
|
||||
crate::apis::openai::Role::User => ConversationRole::User,
|
||||
crate::apis::openai::Role::Assistant => ConversationRole::Assistant,
|
||||
_ => continue,
|
||||
};
|
||||
|
||||
let content = if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
|
||||
vec![ContentBlock::Text { text: text.clone() }]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
bedrock_messages.push(crate::apis::amazon_bedrock::Message {
|
||||
role,
|
||||
content,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
if !system_blocks.is_empty() {
|
||||
self.system = Some(system_blocks);
|
||||
}
|
||||
self.messages = Some(bedrock_messages);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -541,6 +541,65 @@ impl ProviderRequest for MessagesRequest {
|
|||
fn get_temperature(&self) -> Option<f32> {
|
||||
self.temperature
|
||||
}
|
||||
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
|
||||
use crate::apis::openai::Message;
|
||||
|
||||
let mut openai_messages = Vec::new();
|
||||
|
||||
// Add system prompt as system message if present
|
||||
if let Some(system) = &self.system {
|
||||
openai_messages.push(system.clone().into());
|
||||
}
|
||||
|
||||
// Convert each Anthropic message to OpenAI format
|
||||
for msg in &self.messages {
|
||||
if let Ok(converted_msgs) = TryInto::<Vec<Message>>::try_into(msg.clone()) {
|
||||
openai_messages.extend(converted_msgs);
|
||||
}
|
||||
}
|
||||
|
||||
openai_messages
|
||||
}
|
||||
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
// Convert OpenAI messages to Anthropic format
|
||||
// Separate system messages from regular messages
|
||||
let mut system_messages = Vec::new();
|
||||
let mut regular_messages = Vec::new();
|
||||
|
||||
for msg in messages {
|
||||
if msg.role == crate::apis::openai::Role::System {
|
||||
system_messages.push(msg.clone());
|
||||
} else {
|
||||
regular_messages.push(msg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Set system prompt if there are system messages
|
||||
if !system_messages.is_empty() {
|
||||
// Combine all system messages into one
|
||||
let system_text = system_messages.iter()
|
||||
.filter_map(|msg| {
|
||||
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
|
||||
Some(text.as_str())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
self.system = Some(crate::apis::anthropic::MessagesSystemPrompt::Single(system_text));
|
||||
}
|
||||
|
||||
// Convert regular messages
|
||||
self.messages = regular_messages.iter()
|
||||
.filter_map(|msg| {
|
||||
msg.clone().try_into().ok()
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
}
|
||||
|
||||
impl MessagesResponse {
|
||||
|
|
|
|||
|
|
@ -735,6 +735,14 @@ impl ProviderRequest for ChatCompletionsRequest {
|
|||
fn get_temperature(&self) -> Option<f32> {
|
||||
self.temperature
|
||||
}
|
||||
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
|
||||
self.messages.clone()
|
||||
}
|
||||
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
self.messages = messages.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
/// Implementation of ProviderResponse for ChatCompletionsResponse
|
||||
|
|
|
|||
|
|
@ -1134,6 +1134,140 @@ impl ProviderRequest for ResponsesAPIRequest {
|
|||
fn get_temperature(&self) -> Option<f32> {
|
||||
self.temperature
|
||||
}
|
||||
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
|
||||
use crate::apis::openai::{Message, MessageContent, Role};
|
||||
|
||||
let mut openai_messages = Vec::new();
|
||||
|
||||
// Add instructions as system message if present
|
||||
if let Some(instructions) = &self.instructions {
|
||||
openai_messages.push(Message {
|
||||
role: Role::System,
|
||||
content: MessageContent::Text(instructions.clone()),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
// Convert input to messages
|
||||
match &self.input {
|
||||
InputParam::Text(text) => {
|
||||
openai_messages.push(Message {
|
||||
role: Role::User,
|
||||
content: MessageContent::Text(text.clone()),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
InputParam::Items(items) => {
|
||||
for item in items {
|
||||
match item {
|
||||
InputItem::Message(msg) => {
|
||||
// Convert message role
|
||||
let role = match msg.role {
|
||||
MessageRole::User => Role::User,
|
||||
MessageRole::Assistant => Role::Assistant,
|
||||
MessageRole::System => Role::System,
|
||||
MessageRole::Developer => Role::System, // Map developer to system
|
||||
};
|
||||
|
||||
// Extract text from message content
|
||||
let content = match &msg.content {
|
||||
crate::apis::openai_responses::MessageContent::Text(text) => text.clone(),
|
||||
crate::apis::openai_responses::MessageContent::Items(items) => {
|
||||
items.iter()
|
||||
.filter_map(|c| {
|
||||
if let InputContent::InputText { text } = c {
|
||||
Some(text.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
};
|
||||
|
||||
openai_messages.push(Message {
|
||||
role,
|
||||
content: MessageContent::Text(content),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
});
|
||||
}
|
||||
// Skip other input item types for now
|
||||
InputItem::ItemReference { .. } | InputItem::FunctionCallOutput { .. } => {
|
||||
// These are not yet supported in agent framework
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
openai_messages
|
||||
}
|
||||
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
// For ResponsesAPI, we need to convert messages back to input format
|
||||
// Extract system messages as instructions
|
||||
let system_text = messages.iter()
|
||||
.filter(|msg| msg.role == crate::apis::openai::Role::System)
|
||||
.filter_map(|msg| {
|
||||
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
|
||||
Some(text.as_str())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
if !system_text.is_empty() {
|
||||
self.instructions = Some(system_text);
|
||||
}
|
||||
|
||||
// Convert user/assistant messages to InputParam
|
||||
// For simplicity, we'll use the last user message as the input
|
||||
// or combine all non-system messages
|
||||
let input_messages: Vec<_> = messages.iter()
|
||||
.filter(|msg| msg.role != crate::apis::openai::Role::System)
|
||||
.collect();
|
||||
|
||||
if !input_messages.is_empty() {
|
||||
// If there's only one message, use Text format
|
||||
if input_messages.len() == 1 {
|
||||
if let crate::apis::openai::MessageContent::Text(text) = &input_messages[0].content {
|
||||
self.input = crate::apis::openai_responses::InputParam::Text(text.clone());
|
||||
}
|
||||
} else {
|
||||
// Multiple messages - combine them as text for now
|
||||
// A more sophisticated approach would use InputParam::Items
|
||||
let combined_text = input_messages.iter()
|
||||
.filter_map(|msg| {
|
||||
if let crate::apis::openai::MessageContent::Text(text) = &msg.content {
|
||||
Some(format!("{}: {}",
|
||||
match msg.role {
|
||||
crate::apis::openai::Role::User => "User",
|
||||
crate::apis::openai::Role::Assistant => "Assistant",
|
||||
_ => "Unknown",
|
||||
},
|
||||
text
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
self.input = crate::apis::openai_responses::InputParam::Text(combined_text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
|
|
|||
|
|
@ -47,6 +47,28 @@ pub trait ProviderRequest: Send + Sync {
|
|||
fn remove_metadata_key(&mut self, key: &str) -> bool;
|
||||
|
||||
fn get_temperature(&self) -> Option<f32>;
|
||||
|
||||
/// Get message history as OpenAI Message format
|
||||
/// This is useful for processing chat history across different provider formats
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message>;
|
||||
|
||||
/// Set message history from OpenAI Message format
|
||||
/// This converts OpenAI messages to the appropriate format for each provider type
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]);
|
||||
}
|
||||
|
||||
impl ProviderRequestType {
|
||||
/// Set message history from OpenAI Message format
|
||||
/// This converts OpenAI messages to the appropriate format for each provider type
|
||||
pub fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
match self {
|
||||
Self::ChatCompletionsRequest(r) => r.set_messages(messages),
|
||||
Self::MessagesRequest(r) => r.set_messages(messages),
|
||||
Self::BedrockConverse(r) => r.set_messages(messages),
|
||||
Self::BedrockConverseStream(r) => r.set_messages(messages),
|
||||
Self::ResponsesAPIRequest(r) => r.set_messages(messages),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ProviderRequest for ProviderRequestType {
|
||||
|
|
@ -149,6 +171,26 @@ impl ProviderRequest for ProviderRequestType {
|
|||
Self::ResponsesAPIRequest(r) => r.get_temperature(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_messages(&self) -> Vec<crate::apis::openai::Message> {
|
||||
match self {
|
||||
Self::ChatCompletionsRequest(r) => r.get_messages(),
|
||||
Self::MessagesRequest(r) => r.get_messages(),
|
||||
Self::BedrockConverse(r) => r.get_messages(),
|
||||
Self::BedrockConverseStream(r) => r.get_messages(),
|
||||
Self::ResponsesAPIRequest(r) => r.get_messages(),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_messages(&mut self, messages: &[crate::apis::openai::Message]) {
|
||||
match self {
|
||||
Self::ChatCompletionsRequest(r) => r.set_messages(messages),
|
||||
Self::MessagesRequest(r) => r.set_messages(messages),
|
||||
Self::BedrockConverse(r) => r.set_messages(messages),
|
||||
Self::BedrockConverseStream(r) => r.set_messages(messages),
|
||||
Self::ResponsesAPIRequest(r) => r.set_messages(messages),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse the client API from a byte slice.
|
||||
|
|
@ -934,4 +976,131 @@ mod tests {
|
|||
.message
|
||||
.contains("OpenAI ChatCompletions, Anthropic Messages, and OpenAI Responses"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_message_history_chat_completions() {
|
||||
use crate::apis::openai::{Message, MessageContent, Role};
|
||||
|
||||
let chat_req = ChatCompletionsRequest {
|
||||
model: "gpt-4".to_string(),
|
||||
messages: vec![
|
||||
Message {
|
||||
role: Role::System,
|
||||
content: MessageContent::Text("You are helpful".to_string()),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
},
|
||||
Message {
|
||||
role: Role::User,
|
||||
content: MessageContent::Text("Hello!".to_string()),
|
||||
name: None,
|
||||
tool_calls: None,
|
||||
tool_call_id: None,
|
||||
},
|
||||
],
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let provider_req = ProviderRequestType::ChatCompletionsRequest(chat_req);
|
||||
let messages = provider_req.get_messages();
|
||||
|
||||
assert_eq!(messages.len(), 2);
|
||||
assert_eq!(messages[0].role, Role::System);
|
||||
assert_eq!(messages[1].role, Role::User);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_message_history_anthropic_messages() {
|
||||
use crate::apis::anthropic::{
|
||||
MessagesMessage, MessagesMessageContent, MessagesRequest, MessagesRole,
|
||||
MessagesSystemPrompt,
|
||||
};
|
||||
|
||||
let anthropic_req = MessagesRequest {
|
||||
model: "claude-3-sonnet".to_string(),
|
||||
messages: vec![MessagesMessage {
|
||||
role: MessagesRole::User,
|
||||
content: MessagesMessageContent::Single("Hello!".to_string()),
|
||||
}],
|
||||
system: Some(MessagesSystemPrompt::Single(
|
||||
"You are helpful".to_string(),
|
||||
)),
|
||||
max_tokens: 100,
|
||||
container: None,
|
||||
mcp_servers: None,
|
||||
metadata: None,
|
||||
service_tier: None,
|
||||
thinking: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
top_k: None,
|
||||
stream: None,
|
||||
stop_sequences: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
};
|
||||
|
||||
let provider_req = ProviderRequestType::MessagesRequest(anthropic_req);
|
||||
let messages = provider_req.get_messages();
|
||||
|
||||
// Should have system message + user message
|
||||
assert_eq!(messages.len(), 2);
|
||||
assert_eq!(
|
||||
messages[0].role,
|
||||
crate::apis::openai::Role::System
|
||||
);
|
||||
assert_eq!(
|
||||
messages[1].role,
|
||||
crate::apis::openai::Role::User
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_message_history_responses_api() {
|
||||
use crate::apis::openai_responses::{InputParam, ResponsesAPIRequest};
|
||||
|
||||
let responses_req = ResponsesAPIRequest {
|
||||
model: "gpt-4o".to_string(),
|
||||
input: InputParam::Text("Hello, world!".to_string()),
|
||||
instructions: Some("Be helpful".to_string()),
|
||||
temperature: None,
|
||||
max_output_tokens: None,
|
||||
stream: None,
|
||||
metadata: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
parallel_tool_calls: None,
|
||||
modalities: None,
|
||||
user: None,
|
||||
store: None,
|
||||
reasoning_effort: None,
|
||||
include: None,
|
||||
audio: None,
|
||||
text: None,
|
||||
service_tier: None,
|
||||
top_p: None,
|
||||
top_logprobs: None,
|
||||
stream_options: None,
|
||||
truncation: None,
|
||||
conversation: None,
|
||||
previous_response_id: None,
|
||||
max_tool_calls: None,
|
||||
background: None,
|
||||
};
|
||||
|
||||
let provider_req = ProviderRequestType::ResponsesAPIRequest(responses_req);
|
||||
let messages = provider_req.get_messages();
|
||||
|
||||
// Should have system message (instructions) + user message (input)
|
||||
assert_eq!(messages.len(), 2);
|
||||
assert_eq!(
|
||||
messages[0].role,
|
||||
crate::apis::openai::Role::System
|
||||
);
|
||||
assert_eq!(
|
||||
messages[1].role,
|
||||
crate::apis::openai::Role::User
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
106
demos/use_cases/mcp_filter/README.md
Normal file
106
demos/use_cases/mcp_filter/README.md
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# RAG Agent Demo
|
||||
|
||||
A multi-agent RAG system demonstrating archgw's agent filter chain with MCP protocol.
|
||||
|
||||
## Architecture
|
||||
|
||||
This demo consists of three components:
|
||||
1. **Query Rewriter** (MCP filter) - Rewrites user queries for better retrieval
|
||||
2. **Context Builder** (MCP filter) - Retrieves relevant context from knowledge base
|
||||
3. **RAG Agent** (REST) - Generates final responses based on augmented context
|
||||
|
||||
## Components
|
||||
|
||||
### Query Rewriter Filter (MCP)
|
||||
- **Port**: 10501
|
||||
- **Tool**: `query_rewriter`
|
||||
- Improves queries using LLM before retrieval
|
||||
|
||||
### Context Builder Filter (MCP)
|
||||
- **Port**: 10502
|
||||
- **Tool**: `context_builder`
|
||||
- Augments queries with relevant passages from knowledge base
|
||||
|
||||
### RAG Agent (REST/OpenAI)
|
||||
- **Port**: 10505
|
||||
- **Endpoint**: `/v1/chat/completions`
|
||||
- Generates responses using OpenAI-compatible API
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Start all agents
|
||||
```bash
|
||||
./start_agents.sh
|
||||
```
|
||||
|
||||
This starts:
|
||||
- Query Rewriter MCP server on port 10501
|
||||
- Context Builder MCP server on port 10502
|
||||
- RAG Agent REST server on port 10505
|
||||
|
||||
### 2. Start archgw
|
||||
```bash
|
||||
archgw up --foreground
|
||||
```
|
||||
|
||||
### 3. Test the system
|
||||
```bash
|
||||
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-4o",
|
||||
"messages": [{"role": "user", "content": "What is the guaranteed uptime for TechCorp?"}]
|
||||
}'
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
The `arch_config.yaml` defines how agents are connected:
|
||||
|
||||
```yaml
|
||||
filters:
|
||||
- id: query_rewriter
|
||||
url: mcp://host.docker.internal:10500
|
||||
tool: rewrite_query_with_archgw # MCP tool name
|
||||
|
||||
- id: context_builder
|
||||
url: mcp://host.docker.internal:10501
|
||||
tool: chat_completions
|
||||
```
|
||||
How It Works
|
||||
|
||||
1. User sends request to archgw listener on port 8001
|
||||
2. Request passes through MCP filter chain:
|
||||
- **Query Rewriter** rewrites the query for better retrieval
|
||||
- **Context Builder** augments query with relevant knowledge base passages
|
||||
3. Augmented request is forwarded to **RAG Agent** REST endpoint
|
||||
4. RAG Agent generates final response using LLM
|
||||
|
||||
## Configuration
|
||||
|
||||
See `arch_config.yaml` for the complete filter chain setup. The MCP filters use default settings:
|
||||
- `type: mcp` (default)
|
||||
- `transport: streamable-http` (default)
|
||||
- Tool name defaults to filter ID `sample_queries.md` for example queries to test the RAG system.
|
||||
|
||||
Example request:
|
||||
```bash
|
||||
curl -X POST http://localhost:8001/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime for TechCorp?"
|
||||
}
|
||||
]
|
||||
}'
|
||||
```
|
||||
- `LLM_GATEWAY_ENDPOINT` - archgw endpoint (default: `http://localhost:12000/v1`)
|
||||
- `OPENAI_API_KEY` - OpenAI API key for model providers
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- See `sample_queries.md` for more example queries
|
||||
- See `arch_config.yaml` for complete configuration details
|
||||
41
demos/use_cases/mcp_filter/arch_config.yaml
Normal file
41
demos/use_cases/mcp_filter/arch_config.yaml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
version: v0.3.0
|
||||
|
||||
agents:
|
||||
- id: rag_agent
|
||||
url: http://host.docker.internal:10505
|
||||
|
||||
filters:
|
||||
- id: query_rewriter
|
||||
url: http://host.docker.internal:10501
|
||||
# type: mcp # default is mcp
|
||||
# transport: streamable-http # default is streamable-http
|
||||
# tool: query_rewriter # default name is the filter id
|
||||
- id: context_builder
|
||||
url: http://host.docker.internal:10502
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
model_aliases:
|
||||
fast-llm:
|
||||
target: gpt-4o-mini
|
||||
smart-llm:
|
||||
target: gpt-4o
|
||||
|
||||
listeners:
|
||||
- type: agent
|
||||
name: agent_1
|
||||
port: 8001
|
||||
router: arch_agent_router
|
||||
agents:
|
||||
- id: rag_agent
|
||||
description: virtual assistant for retrieval augmented generation tasks
|
||||
filter_chain:
|
||||
- query_rewriter
|
||||
- context_builder
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
17
demos/use_cases/mcp_filter/docker-compose.yaml
Normal file
17
demos/use_cases/mcp_filter/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
services:
|
||||
jaeger:
|
||||
build:
|
||||
context: ../../shared/jaeger
|
||||
ports:
|
||||
- "16686:16686"
|
||||
- "4317:4317"
|
||||
- "4318:4318"
|
||||
open-web-ui:
|
||||
image: dyrnq/open-webui:main
|
||||
restart: always
|
||||
ports:
|
||||
- "8080:8080"
|
||||
environment:
|
||||
- DEFAULT_MODEL=gpt-4o-mini
|
||||
- ENABLE_OPENAI_API=true
|
||||
- OPENAI_API_BASE_URL=http://host.docker.internal:8001/v1
|
||||
86
demos/use_cases/mcp_filter/mcp_query.rest
Normal file
86
demos/use_cases/mcp_filter/mcp_query.rest
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
### Initialize MCP Session (SSE)
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
|
||||
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"capabilities":{},"protocolVersion":"2024-11-05","clientInfo":{"name":"test","version":"1.0.0"}}}
|
||||
|
||||
### Send Initialized Notification
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: 35d455dc07b8400887f86668590f12bb
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"method": "notifications/initialized"
|
||||
}
|
||||
|
||||
### List Tools
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: eb10a691b36e4547b6c93c5dc5b47e11
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "list-tools-1",
|
||||
"method": "tools/list"
|
||||
}
|
||||
|
||||
### Call Query Rewriter Tool
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: 6b95ff75825a402b90eb3ea07e23fbce
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "3d3b886a-6216-4a26-a422-7a972529c0e7",
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"arguments": {
|
||||
"messages": [
|
||||
{
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?",
|
||||
"role": "user"
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": "query_rewriter"
|
||||
}
|
||||
}
|
||||
|
||||
### another test
|
||||
|
||||
# Content-Type: application/json
|
||||
# Accept: application/json, text/event-stream
|
||||
# mcp-session-id: ed7a81a1d39549ecaadb867a6b2daf1e
|
||||
|
||||
POST http://localhost:10501/mcp
|
||||
content-type: application/json
|
||||
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
|
||||
accept: application/json, text/event-stream
|
||||
|
||||
{"jsonrpc":"2.0","id":"4bb1043a-2953-4bcd-b801-f270b0ae8c39","method":"tools/call","params":{"arguments":{"messages":[{"content":"What is the guaranteed uptime percentage for TechCorp's cloud services?","role":"user"}]},"name":"query_rewriter"}}
|
||||
|
||||
|
||||
|
||||
### stream test
|
||||
|
||||
POST http://localhost:10501/mcp
|
||||
content-type: application/json
|
||||
mcp-session-id: 35d455dc07b8400887f86668590f12bb
|
||||
accept: application/json, text/event-stream
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "long_job",
|
||||
"arguments": {
|
||||
"n": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
22
demos/use_cases/mcp_filter/pyproject.toml
Normal file
22
demos/use_cases/mcp_filter/pyproject.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
[project]
|
||||
name = "rag_agent"
|
||||
version = "0.1.0"
|
||||
description = "RAG Agent"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"click>=8.2.1",
|
||||
"mcp>=1.13.1",
|
||||
"fastmcp>=2.14",
|
||||
"pydantic>=2.11.7",
|
||||
"fastapi>=0.104.1",
|
||||
"uvicorn>=0.24.0",
|
||||
"openai==2.13.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
rag_agent = "rag_agent:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
64
demos/use_cases/mcp_filter/sample_queries.md
Normal file
64
demos/use_cases/mcp_filter/sample_queries.md
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# Sample Queries for Knowledge Base RAG Agent
|
||||
|
||||
## Service Level Agreement Queries
|
||||
- What is the guaranteed uptime percentage for TechCorp's cloud services?
|
||||
- What remedies are available if the API response time exceeds the agreed threshold?
|
||||
- How quickly must TechCorp respond to critical support issues?
|
||||
- What monitoring and reporting requirements are specified in the SLA?
|
||||
- When was the TechCorp service agreement signed and by whom?
|
||||
|
||||
## Privacy Policy Queries
|
||||
- What encryption methods does DataSecure use to protect data?
|
||||
- How long does DataSecure retain personal data after account deletion?
|
||||
- What rights do users have regarding their personal information?
|
||||
- Can DataSecure sell user data to third parties for marketing?
|
||||
- Who should be contacted for privacy-related concerns at DataSecure?
|
||||
|
||||
## Supply Chain Agreement Queries
|
||||
- What types of automotive components does PrecisionParts supply?
|
||||
- What are the payment terms and volume discount structure?
|
||||
- What quality standards must the supplied components meet?
|
||||
- What are the penalties for late delivery?
|
||||
- What insurance coverage requirements apply to the supplier?
|
||||
|
||||
## Student Data Management Queries
|
||||
- What federal laws must EduTech comply with regarding student data?
|
||||
- What security measures are in place to protect student information?
|
||||
- How long are student records retained after graduation?
|
||||
- What consent is required for students under 13 years old?
|
||||
- Who can access student educational records?
|
||||
|
||||
## Investment Advisory Queries
|
||||
- What is FinanceFirst's management fee structure?
|
||||
- What types of investments are included in the advisory services?
|
||||
- What regulatory body oversees FinanceFirst Advisors?
|
||||
- How often are portfolio reviews conducted?
|
||||
- What are the client's responsibilities under this agreement?
|
||||
|
||||
## Healthcare Standards Queries
|
||||
- What is the target response time for emergency code teams?
|
||||
- What hand hygiene compliance rate is required?
|
||||
- How quickly must medical records be completed after patient encounters?
|
||||
- What continuing education requirements apply to nursing staff?
|
||||
- What patient safety protocols are mandatory upon admission?
|
||||
|
||||
## Cross-Document Queries
|
||||
- Which agreements include confidentiality or data protection provisions?
|
||||
- What are the common termination notice periods across different contract types?
|
||||
- Which documents specify insurance or liability coverage requirements?
|
||||
- What compliance and regulatory requirements are mentioned across agreements?
|
||||
- Which contracts include performance metrics or service level commitments?
|
||||
|
||||
## Complex Analysis Queries
|
||||
- Compare the data retention policies across the privacy policy and student data management documents.
|
||||
- What are the different approaches to risk management across the supply chain and investment advisory agreements?
|
||||
- How do the security measures in the healthcare standards compare to those in the privacy policy?
|
||||
- Which agreements provide the most detailed compliance and regulatory frameworks?
|
||||
- What common themes exist in the quality assurance requirements across different industries?
|
||||
|
||||
## Document-Specific Detail Queries
|
||||
- List all the specific percentages, timeframes, and numerical requirements mentioned in the SLA.
|
||||
- What are all the contact persons and their roles mentioned across the documents?
|
||||
- Identify all the compliance standards and certifications referenced in the supply chain agreement.
|
||||
- What are the specific consequences or penalties mentioned for non-compliance across agreements?
|
||||
- List all the third-party systems, tools, or services mentioned in the documents.
|
||||
98
demos/use_cases/mcp_filter/src/rag_agent/__init__.py
Normal file
98
demos/use_cases/mcp_filter/src/rag_agent/__init__.py
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
import click
|
||||
from fastmcp import FastMCP
|
||||
|
||||
mcp = None
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option(
|
||||
"--transport",
|
||||
"transport",
|
||||
default="streamable-http",
|
||||
help="Transport type: stdio or sse",
|
||||
)
|
||||
@click.option("--host", "host", default="localhost", help="Host to bind MCP server to")
|
||||
@click.option("--port", "port", type=int, default=10500, help="Port for MCP server")
|
||||
@click.option(
|
||||
"--agent",
|
||||
"agent",
|
||||
required=True,
|
||||
help="Agent name: query_rewriter, context_builder, or response_generator",
|
||||
)
|
||||
@click.option(
|
||||
"--name",
|
||||
"agent_name",
|
||||
default=None,
|
||||
help="Custom MCP server name (defaults to agent type)",
|
||||
)
|
||||
@click.option(
|
||||
"--rest-server",
|
||||
"rest_server",
|
||||
is_flag=True,
|
||||
help="Start REST server instead of MCP server",
|
||||
)
|
||||
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
|
||||
def main(host, port, agent, transport, agent_name, rest_server, rest_port):
|
||||
"""Start a RAG agent as an MCP server or REST server."""
|
||||
|
||||
# Map friendly names to agent modules
|
||||
agent_map = {
|
||||
"query_rewriter": ("rag_agent.query_rewriter", "Query Rewriter Agent"),
|
||||
"context_builder": ("rag_agent.context_builder", "Context Builder Agent"),
|
||||
"response_generator": (
|
||||
"rag_agent.rag_agent",
|
||||
"Response Generator Agent",
|
||||
),
|
||||
}
|
||||
|
||||
if agent not in agent_map:
|
||||
print(f"Error: Unknown agent '{agent}'")
|
||||
print(f"Available agents: {', '.join(agent_map.keys())}")
|
||||
return
|
||||
|
||||
module_name, default_name = agent_map[agent]
|
||||
mcp_name = agent_name or default_name
|
||||
|
||||
if rest_server:
|
||||
# Only response_generator supports REST server mode
|
||||
if agent != "response_generator":
|
||||
print(f"Error: Agent '{agent}' does not support REST server mode.")
|
||||
print(f"REST server is only supported for: response_generator")
|
||||
print(f"Remove --rest-server flag to start {agent} as an MCP server.")
|
||||
return
|
||||
|
||||
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
|
||||
from rag_agent.rag_agent import start_server
|
||||
|
||||
start_server(host=host, port=rest_port)
|
||||
return
|
||||
else:
|
||||
# Only query_rewriter and context_builder support MCP
|
||||
if agent not in ["query_rewriter", "context_builder"]:
|
||||
print(f"Error: Agent '{agent}' does not support MCP mode.")
|
||||
print(f"MCP is only supported for: query_rewriter, context_builder")
|
||||
print(f"Use --rest-server flag to start {agent} as a REST server.")
|
||||
return
|
||||
|
||||
global mcp
|
||||
mcp = FastMCP(mcp_name, host=host, port=port)
|
||||
|
||||
print(f"Starting MCP server: {mcp_name}")
|
||||
print(f" Agent: {agent}")
|
||||
print(f" Transport: {transport}")
|
||||
print(f" Host: {host}")
|
||||
print(f" Port: {port}")
|
||||
|
||||
# Import the agent module to register its tools
|
||||
import importlib
|
||||
|
||||
importlib.import_module(module_name)
|
||||
|
||||
print(f"Agent '{agent}' loaded successfully")
|
||||
print(f"MCP server ready on {transport}://{host}:{port}")
|
||||
|
||||
mcp.run(transport=transport)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
4
demos/use_cases/mcp_filter/src/rag_agent/__main__.py
Normal file
4
demos/use_cases/mcp_filter/src/rag_agent/__main__.py
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
from . import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
36
demos/use_cases/mcp_filter/src/rag_agent/api.py
Normal file
36
demos/use_cases/mcp_filter/src/rag_agent/api.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
class ChatMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
||||
|
||||
class ChatCompletionRequest(BaseModel):
|
||||
model: str
|
||||
messages: List[ChatMessage]
|
||||
temperature: Optional[float] = 1.0
|
||||
max_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = 1.0
|
||||
frequency_penalty: Optional[float] = 0.0
|
||||
presence_penalty: Optional[float] = 0.0
|
||||
stream: Optional[bool] = False
|
||||
stop: Optional[List[str]] = None
|
||||
|
||||
|
||||
class ChatCompletionResponse(BaseModel):
|
||||
id: str
|
||||
object: str = "chat.completion"
|
||||
created: int
|
||||
model: str
|
||||
choices: List[Dict[str, Any]]
|
||||
usage: Dict[str, int]
|
||||
|
||||
|
||||
class ChatCompletionStreamResponse(BaseModel):
|
||||
id: str
|
||||
object: str = "chat.completion.chunk"
|
||||
created: int
|
||||
model: str
|
||||
choices: List[Dict[str, Any]]
|
||||
205
demos/use_cases/mcp_filter/src/rag_agent/context_builder.py
Normal file
205
demos/use_cases/mcp_filter/src/rag_agent/context_builder.py
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
import json
|
||||
from typing import List, Optional, Dict, Any
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
from .api import ChatMessage
|
||||
from . import mcp
|
||||
from fastmcp.server.dependencies import get_http_headers
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [CONTEXT_BUILDER] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RAG_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
# Global variable to store the knowledge base
|
||||
knowledge_base = []
|
||||
|
||||
|
||||
def load_knowledge_base():
|
||||
"""Load the sample_knowledge_base.csv file into memory on startup."""
|
||||
global knowledge_base
|
||||
|
||||
# Get the path to the CSV file relative to this script
|
||||
current_dir = Path(__file__).parent
|
||||
csv_path = current_dir / "sample_knowledge_base.csv"
|
||||
|
||||
print(f"Loading knowledge base from {csv_path}")
|
||||
|
||||
try:
|
||||
knowledge_base = []
|
||||
with open(csv_path, "r", encoding="utf-8-sig") as file:
|
||||
csv_reader = csv.DictReader(file)
|
||||
for row in csv_reader:
|
||||
knowledge_base.append({"path": row["path"], "content": row["content"]})
|
||||
|
||||
logger.info(f"Loaded {len(knowledge_base)} documents from knowledge base")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading knowledge base: {e}")
|
||||
knowledge_base = []
|
||||
|
||||
|
||||
async def find_relevant_passages(
|
||||
query: str, traceparent: Optional[str] = None, top_k: int = 3
|
||||
) -> List[Dict[str, str]]:
|
||||
"""Use the LLM to find the most relevant passages from the knowledge base."""
|
||||
|
||||
if not knowledge_base:
|
||||
logger.warning("Knowledge base is empty")
|
||||
return []
|
||||
|
||||
# Create a system prompt for passage selection
|
||||
system_prompt = f"""You are a retrieval assistant that selects the most relevant document passages for a given query.
|
||||
|
||||
Given a user query and a list of document passages, identify the {top_k} most relevant passages that would help answer the query.
|
||||
|
||||
Query: {query}
|
||||
|
||||
Available passages:
|
||||
"""
|
||||
|
||||
# Add all passages with indices
|
||||
for i, doc in enumerate(knowledge_base):
|
||||
system_prompt += (
|
||||
f"\n[{i}] Path: {doc['path']}\nContent: {doc['content'][:500]}...\n"
|
||||
)
|
||||
|
||||
system_prompt += f"""
|
||||
|
||||
Please respond with ONLY the indices of the {top_k} most relevant passages, separated by commas (e.g., "0,3,7").
|
||||
If fewer than {top_k} passages are relevant, return only the relevant ones.
|
||||
If no passages are relevant, return "NONE"."""
|
||||
|
||||
try:
|
||||
# Call archgw to select relevant passages
|
||||
logger.info(f"Calling archgw to find relevant passages for query: '{query}'")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent:
|
||||
extra_headers["traceparent"] = traceparent
|
||||
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=RAG_MODEL,
|
||||
messages=[{"role": "system", "content": system_prompt}],
|
||||
temperature=0.1,
|
||||
max_tokens=50,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
result = response.choices[0].message.content.strip()
|
||||
logger.info(f"LLM selected passages: {result}")
|
||||
|
||||
# Parse the indices
|
||||
if result.upper() == "NONE":
|
||||
return []
|
||||
|
||||
selected_passages = []
|
||||
indices = [
|
||||
int(idx.strip()) for idx in result.split(",") if idx.strip().isdigit()
|
||||
]
|
||||
|
||||
for idx in indices:
|
||||
if 0 <= idx < len(knowledge_base):
|
||||
selected_passages.append(knowledge_base[idx])
|
||||
|
||||
logger.info(f"Selected {len(selected_passages)} relevant passages")
|
||||
return selected_passages
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error finding relevant passages: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def augment_query_with_context(
|
||||
messages: List[ChatMessage], traceparent: Optional[str] = None
|
||||
) -> List[ChatMessage]:
|
||||
"""Extract user query, find relevant context, and augment the messages."""
|
||||
|
||||
# Find the last user message
|
||||
last_user_message = None
|
||||
last_user_index = -1
|
||||
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
if messages[i].role == "user":
|
||||
last_user_message = messages[i].content
|
||||
last_user_index = i
|
||||
break
|
||||
|
||||
if not last_user_message:
|
||||
logger.warning("No user message found in conversation")
|
||||
return messages
|
||||
|
||||
logger.info(f"Processing user query: '{last_user_message}'")
|
||||
|
||||
# Find relevant passages
|
||||
relevant_passages = await find_relevant_passages(last_user_message, traceparent)
|
||||
|
||||
if not relevant_passages:
|
||||
logger.info("No relevant passages found, returning original messages")
|
||||
return messages
|
||||
|
||||
# Build context from relevant passages
|
||||
context_parts = []
|
||||
for i, passage in enumerate(relevant_passages):
|
||||
context_parts.append(
|
||||
f"Document {i+1} ({passage['path']}):\n{passage['content']}"
|
||||
)
|
||||
|
||||
context = "\n\n".join(context_parts)
|
||||
|
||||
# Create augmented content with original query and context
|
||||
augmented_content = f"""{last_user_message} RELEVANT CONTEXT:
|
||||
{context}"""
|
||||
|
||||
# Create updated messages with the augmented query
|
||||
updated_messages = messages.copy()
|
||||
updated_messages[last_user_index] = ChatMessage(
|
||||
role="user", content=augmented_content
|
||||
)
|
||||
|
||||
logger.info(f"Augmented user query with {len(relevant_passages)} relevant passages")
|
||||
|
||||
return updated_messages
|
||||
|
||||
|
||||
# Load knowledge base on module import
|
||||
load_knowledge_base()
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def context_builder(messages: List[ChatMessage]) -> List[ChatMessage]:
|
||||
"""MCP tool that augments user queries with relevant context from the knowledge base."""
|
||||
logger.info(f"Received chat completion request with {len(messages)} messages")
|
||||
|
||||
# Get traceparent header from MCP request
|
||||
headers = get_http_headers()
|
||||
traceparent_header = headers.get("traceparent")
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Augment the user query with relevant context
|
||||
updated_messages = await augment_query_with_context(messages, traceparent_header)
|
||||
|
||||
# Return as dict to minimize text serialization
|
||||
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
|
||||
119
demos/use_cases/mcp_filter/src/rag_agent/query_rewriter.py
Normal file
119
demos/use_cases/mcp_filter/src/rag_agent/query_rewriter.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
import asyncio
|
||||
import json
|
||||
from typing import List, Optional, Dict, Any
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
|
||||
from .api import ChatMessage
|
||||
from . import mcp
|
||||
from fastmcp.server.dependencies import get_http_headers
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [QUERY_REWRITER] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
|
||||
async def rewrite_query_with_archgw(
|
||||
messages: List[ChatMessage], traceparent_header: str
|
||||
) -> str:
|
||||
"""Rewrite the user query using LLM for better retrieval."""
|
||||
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
|
||||
|
||||
Given a conversation history, rewrite the last user message to be more specific and context-aware.
|
||||
The rewritten query should:
|
||||
1. Include relevant context from previous messages
|
||||
2. Be clear and specific for information retrieval
|
||||
3. Maintain the user's intent
|
||||
4. Be concise but comprehensive
|
||||
|
||||
Return only the rewritten query, nothing else."""
|
||||
|
||||
# Prepare messages for the query rewriter - just add system prompt to existing messages
|
||||
rewrite_messages = [{"role": "system", "content": system_prompt}]
|
||||
|
||||
# Add conversation history
|
||||
for msg in messages:
|
||||
rewrite_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to rewrite query")
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=QUERY_REWRITE_MODEL,
|
||||
messages=rewrite_messages,
|
||||
temperature=0.3,
|
||||
max_tokens=200,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
rewritten_query = response.choices[0].message.content.strip()
|
||||
logger.info(f"Query rewritten successfully: '{rewritten_query}'")
|
||||
return rewritten_query
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rewriting query: {e}")
|
||||
|
||||
# If rewriting fails, return the original last user message
|
||||
logger.info("Falling back to original user message")
|
||||
for message in reversed(messages):
|
||||
if message.role == "user":
|
||||
return message.content
|
||||
return ""
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def query_rewriter(messages: List[ChatMessage]) -> List[ChatMessage]:
|
||||
"""Chat completions endpoint that rewrites the last user query using archgw.
|
||||
|
||||
Returns a dict with a 'messages' key containing the updated message list.
|
||||
"""
|
||||
import time
|
||||
import uuid
|
||||
|
||||
logger.info(f"Received chat completion request with {len(messages)} messages")
|
||||
|
||||
# Get traceparent header from HTTP request using FastMCP's dependency function
|
||||
headers = get_http_headers()
|
||||
traceparent_header = headers.get("traceparent")
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Call archgw to rewrite the last user query
|
||||
rewritten_query = await rewrite_query_with_archgw(messages, traceparent_header)
|
||||
|
||||
# Create updated messages with the rewritten query
|
||||
updated_messages = messages.copy()
|
||||
|
||||
# Find and update the last user message with the rewritten query
|
||||
for i in range(len(updated_messages) - 1, -1, -1):
|
||||
if updated_messages[i].role == "user":
|
||||
original_query = updated_messages[i].content
|
||||
updated_messages[i] = ChatMessage(role="user", content=rewritten_query)
|
||||
logger.info(
|
||||
f"Updated user query from '{original_query}' to '{rewritten_query}'"
|
||||
)
|
||||
break
|
||||
|
||||
# Return as dict to minimize text serialization
|
||||
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
|
||||
303
demos/use_cases/mcp_filter/src/rag_agent/rag_agent.py
Normal file
303
demos/use_cases/mcp_filter/src/rag_agent/rag_agent.py
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
import json
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import StreamingResponse
|
||||
from openai import AsyncOpenAI
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import uuid
|
||||
import uvicorn
|
||||
import asyncio
|
||||
|
||||
from .api import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatCompletionStreamResponse,
|
||||
)
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration for archgw LLM gateway
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
||||
RESPONSE_MODEL = "gpt-4o"
|
||||
|
||||
# System prompt for response generation
|
||||
SYSTEM_PROMPT = """You are a helpful assistant that generates coherent, contextual responses.
|
||||
|
||||
Given a conversation history, generate a helpful and relevant response based on all the context available in the messages.
|
||||
Your response should:
|
||||
1. Be contextually aware of the entire conversation
|
||||
2. Address the user's needs appropriately
|
||||
3. Be helpful and informative
|
||||
4. Maintain a natural conversational tone
|
||||
|
||||
Generate a complete response to assist the user."""
|
||||
|
||||
# Initialize OpenAI client for archgw
|
||||
archgw_client = AsyncOpenAI(
|
||||
base_url=LLM_GATEWAY_ENDPOINT,
|
||||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
# FastAPI app for REST server
|
||||
app = FastAPI(title="RAG Agent Response Generator", version="1.0.0")
|
||||
|
||||
|
||||
def prepare_response_messages(request_body: ChatCompletionRequest):
|
||||
"""Prepare messages for response generation by adding system prompt."""
|
||||
response_messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
||||
|
||||
# Add conversation history
|
||||
for msg in request_body.messages:
|
||||
response_messages.append({"role": msg.role, "content": msg.content})
|
||||
|
||||
return response_messages
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
async def chat_completion_http(request: Request, request_body: ChatCompletionRequest):
|
||||
"""HTTP endpoint for chat completions with streaming support."""
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
||||
# Get traceparent header from HTTP request
|
||||
traceparent_header = request.headers.get("traceparent")
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Check if streaming is requested
|
||||
if request_body.stream:
|
||||
return StreamingResponse(
|
||||
stream_chat_completions(request_body, traceparent_header),
|
||||
media_type="text/plain",
|
||||
headers={
|
||||
"content-type": "text/event-stream",
|
||||
},
|
||||
)
|
||||
else:
|
||||
return await non_streaming_chat_completions(request_body, traceparent_header)
|
||||
|
||||
|
||||
async def stream_chat_completions(
|
||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
||||
):
|
||||
"""Generate streaming chat completions."""
|
||||
# Prepare messages for response generation
|
||||
response_messages = prepare_response_messages(request_body)
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client for streaming
|
||||
logger.info(
|
||||
f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate streaming response"
|
||||
)
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
response_stream = await archgw_client.chat.completions.create(
|
||||
model=RESPONSE_MODEL,
|
||||
messages=response_messages,
|
||||
temperature=request_body.temperature or 0.7,
|
||||
max_tokens=request_body.max_tokens or 1000,
|
||||
stream=True,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
||||
created_time = int(time.time())
|
||||
collected_content = []
|
||||
|
||||
async for chunk in response_stream:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
collected_content.append(content)
|
||||
|
||||
# Create streaming response chunk
|
||||
stream_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created_time,
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"content": content},
|
||||
"finish_reason": None,
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {stream_chunk.model_dump_json()}\n\n"
|
||||
|
||||
# Send final chunk with complete response in expected format
|
||||
full_response = "".join(collected_content)
|
||||
updated_history = [{"role": "assistant", "content": full_response}]
|
||||
|
||||
final_chunk = ChatCompletionStreamResponse(
|
||||
id=completion_id,
|
||||
created=created_time,
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {},
|
||||
"finish_reason": "stop",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": json.dumps(updated_history),
|
||||
},
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating streaming response: {e}")
|
||||
|
||||
# Send error as streaming response
|
||||
error_chunk = ChatCompletionStreamResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {
|
||||
"content": "I apologize, but I'm having trouble generating a response right now. Please try again."
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
yield f"data: {error_chunk.model_dump_json()}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
|
||||
async def non_streaming_chat_completions(
|
||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
||||
):
|
||||
"""Generate non-streaming chat completions."""
|
||||
# Prepare messages for response generation
|
||||
response_messages = prepare_response_messages(request_body)
|
||||
|
||||
try:
|
||||
# Call archgw using OpenAI client
|
||||
logger.info(f"Calling archgw at {LLM_GATEWAY_ENDPOINT} to generate response")
|
||||
|
||||
# Prepare extra headers if traceparent is provided
|
||||
extra_headers = {"x-envoy-max-retries": "3"}
|
||||
if traceparent_header:
|
||||
extra_headers["traceparent"] = traceparent_header
|
||||
|
||||
response = await archgw_client.chat.completions.create(
|
||||
model=RESPONSE_MODEL,
|
||||
messages=response_messages,
|
||||
temperature=request_body.temperature or 0.7,
|
||||
max_tokens=request_body.max_tokens or 1000,
|
||||
extra_headers=extra_headers,
|
||||
)
|
||||
|
||||
generated_response = response.choices[0].message.content.strip()
|
||||
logger.info(f"Response generated successfully")
|
||||
|
||||
return ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": generated_response,
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
),
|
||||
"completion_tokens": len(generated_response.split()),
|
||||
"total_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
)
|
||||
+ len(generated_response.split()),
|
||||
},
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating response: {e}")
|
||||
|
||||
# Fallback response
|
||||
fallback_message = "I apologize, but I'm having trouble generating a response right now. Please try again."
|
||||
return ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "assistant", "content": fallback_message},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
),
|
||||
"completion_tokens": len(fallback_message.split()),
|
||||
"total_tokens": sum(
|
||||
len(msg.content.split()) for msg in request_body.messages
|
||||
)
|
||||
+ len(fallback_message.split()),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""Health check endpoint."""
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
def start_server(host: str = "localhost", port: int = 8000):
|
||||
"""Start the REST server."""
|
||||
uvicorn.run(
|
||||
app,
|
||||
host=host,
|
||||
port=port,
|
||||
log_config={
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
"formatters": {
|
||||
"default": {
|
||||
"format": "%(asctime)s - [RESPONSE_GENERATOR] - %(levelname)s - %(message)s",
|
||||
},
|
||||
},
|
||||
"handlers": {
|
||||
"default": {
|
||||
"formatter": "default",
|
||||
"class": "logging.StreamHandler",
|
||||
"stream": "ext://sys.stdout",
|
||||
},
|
||||
},
|
||||
"root": {
|
||||
"level": "INFO",
|
||||
"handlers": ["default"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
|
@ -0,0 +1,257 @@
|
|||
path,content
|
||||
TechCorp_CloudServices_SLA_Agreement_2024,"SERVICE LEVEL AGREEMENT
|
||||
This Service Level Agreement (""SLA"") is entered into on March 15, 2024, between TechCorp Solutions Inc., a Delaware corporation (""Provider""), and CloudFirst Enterprises LLC (""Customer"").
|
||||
|
||||
DEFINITIONS
|
||||
Service Availability: The percentage of time during which the cloud services are operational and accessible.
|
||||
Downtime: Any period when the services are unavailable or inaccessible to Customer.
|
||||
Response Time: The time between service request submission and initial response from Provider.
|
||||
|
||||
SERVICE COMMITMENTS
|
||||
Provider guarantees 99.9% uptime for all cloud infrastructure services during any calendar month.
|
||||
Average response time for API calls shall not exceed 200 milliseconds under normal operating conditions.
|
||||
Customer support response times: Critical issues within 1 hour, Standard issues within 4 hours.
|
||||
|
||||
REMEDIES
|
||||
For each full percentage point below 99.9% availability, Customer receives 10% credit on monthly fees.
|
||||
If response times exceed 500ms for more than 5 minutes in any hour, Customer receives 5% monthly credit.
|
||||
|
||||
MONITORING AND REPORTING
|
||||
Provider will maintain real-time monitoring systems and provide monthly performance reports.
|
||||
All metrics will be measured from Provider's monitoring systems located in primary data centers.
|
||||
|
||||
This SLA remains in effect for the duration of the underlying service agreement.
|
||||
|
||||
Executed by:
|
||||
TechCorp Solutions Inc.
|
||||
Sarah Mitchell, VP Operations
|
||||
Date: March 15, 2024
|
||||
|
||||
CloudFirst Enterprises LLC
|
||||
Robert Chen, CTO
|
||||
Date: March 16, 2024"
|
||||
|
||||
DataSecure_Privacy_Policy_v3.2,"PRIVACY POLICY
|
||||
DataSecure Analytics, Inc. (""Company"") Privacy Policy
|
||||
Effective Date: January 1, 2024
|
||||
Last Updated: February 28, 2024
|
||||
|
||||
INFORMATION COLLECTION
|
||||
We collect information you provide directly, such as account details, usage preferences, and communication records.
|
||||
Automatically collected data includes IP addresses, browser types, device information, and service interaction logs.
|
||||
Third-party integrations may provide additional user behavior and demographic information with consent.
|
||||
|
||||
DATA USAGE
|
||||
Personal information is used to provide services, improve user experience, and communicate service updates.
|
||||
Aggregated, non-identifiable data may be used for analytics, research, and service enhancement.
|
||||
We do not sell personal information to third parties for marketing purposes.
|
||||
|
||||
DATA PROTECTION
|
||||
All data is encrypted in transit using TLS 1.3 and at rest using AES-256 encryption.
|
||||
Access controls limit data access to authorized personnel only on a need-to-know basis.
|
||||
Regular security audits and penetration testing ensure ongoing protection measures.
|
||||
|
||||
DATA RETENTION
|
||||
Personal data is retained for the duration of active service plus 24 months.
|
||||
Logs and analytics data are retained for 12 months unless legally required otherwise.
|
||||
Upon account deletion, personal data is permanently removed within 30 days.
|
||||
|
||||
USER RIGHTS
|
||||
Users may request access to, correction of, or deletion of their personal information.
|
||||
Data portability requests will be fulfilled in standard formats within 30 days.
|
||||
Marketing communications can be opted out of at any time.
|
||||
|
||||
CONTACT
|
||||
For privacy concerns, contact: privacy@datasecure.com
|
||||
Data Protection Officer: Jennifer Walsh, jwalsh@datasecure.com"
|
||||
|
||||
GlobalManufacturing_SupplyChain_Contract_Q2_2024,"SUPPLY CHAIN AGREEMENT
|
||||
This Supply Chain Agreement is entered into between GlobalManufacturing Corp (""Buyer"") and PrecisionParts Ltd (""Supplier"") effective April 1, 2024.
|
||||
|
||||
SCOPE OF SERVICES
|
||||
Supplier will provide automotive components including brake assemblies, suspension parts, and electrical harnesses.
|
||||
All products must meet ISO 9001 quality standards and automotive industry specifications.
|
||||
Delivery schedule: Weekly shipments every Tuesday, with 48-hour advance shipping notifications.
|
||||
|
||||
PRICING AND PAYMENT
|
||||
Component pricing is fixed for initial 6-month term with quarterly price review thereafter.
|
||||
Payment terms: Net 45 days from invoice date via electronic transfer.
|
||||
Volume discounts apply: 5% for orders exceeding 10,000 units per month, 8% for orders exceeding 25,000 units.
|
||||
|
||||
QUALITY REQUIREMENTS
|
||||
All components must pass incoming inspection with less than 0.1% defect rate.
|
||||
Supplier maintains quality certifications including IATF 16949 and environmental compliance.
|
||||
Batch tracking and traceability required for all delivered components.
|
||||
|
||||
LOGISTICS AND DELIVERY
|
||||
Supplier responsible for packaging, labeling, and delivery to Buyer's distribution centers.
|
||||
Delivery windows: 8 AM - 4 PM, Monday through Friday, with advance appointment scheduling.
|
||||
Late delivery penalties: 2% of shipment value for each day beyond scheduled delivery.
|
||||
|
||||
RISK MANAGEMENT
|
||||
Supplier maintains business continuity plans and alternative sourcing strategies.
|
||||
Force majeure events must be reported within 24 hours with mitigation plans.
|
||||
Insurance requirements: $5M general liability, $2M product liability coverage.
|
||||
|
||||
INTELLECTUAL PROPERTY
|
||||
All custom tooling and specifications remain property of Buyer.
|
||||
Supplier grants license to use necessary patents for component manufacturing.
|
||||
|
||||
This agreement shall remain in effect for 24 months with automatic renewal unless terminated.
|
||||
|
||||
GlobalManufacturing Corp
|
||||
Michael Rodriguez, Supply Chain Director
|
||||
Date: April 1, 2024
|
||||
|
||||
PrecisionParts Ltd
|
||||
Amanda Foster, VP Sales
|
||||
Date: April 2, 2024"
|
||||
|
||||
EduTech_StudentData_Management_Policy_2024,"STUDENT DATA MANAGEMENT POLICY
|
||||
EduTech Learning Platform - Data Management and Protection Policy
|
||||
Document Version: 2.1
|
||||
Effective Date: August 15, 2024
|
||||
|
||||
SCOPE AND PURPOSE
|
||||
This policy governs the collection, use, storage, and protection of student educational records and personal information.
|
||||
Applies to all employees, contractors, and third-party service providers accessing student data.
|
||||
Compliance with FERPA, COPPA, and state student privacy laws is mandatory.
|
||||
|
||||
DATA CLASSIFICATION
|
||||
Educational Records: Grades, attendance, assignments, and academic progress information.
|
||||
Personal Information: Names, addresses, contact details, and demographic information.
|
||||
Behavioral Data: Learning patterns, platform usage, and engagement metrics.
|
||||
|
||||
COLLECTION PRINCIPLES
|
||||
Data collection is limited to educational purposes and service improvement only.
|
||||
Parental consent required for students under 13 years of age.
|
||||
Students and parents have right to review and request corrections to educational records.
|
||||
|
||||
ACCESS CONTROLS
|
||||
Role-based access ensures personnel see only data necessary for their functions.
|
||||
Multi-factor authentication required for all system access.
|
||||
Access logs maintained and reviewed monthly for unauthorized activity.
|
||||
|
||||
DATA SHARING
|
||||
Educational records shared only with authorized school personnel and parents/students.
|
||||
No data sharing with third parties for commercial purposes without explicit consent.
|
||||
Research data must be de-identified and aggregated before external sharing.
|
||||
|
||||
SECURITY MEASURES
|
||||
Data encrypted using industry-standard protocols during transmission and storage.
|
||||
Regular security assessments and vulnerability testing conducted quarterly.
|
||||
Incident response plan includes notification procedures for data breaches.
|
||||
|
||||
RETENTION AND DISPOSAL
|
||||
Student records retained according to school district policies, typically 5-7 years post-graduation.
|
||||
Inactive accounts and associated data purged after 2 years of non-use.
|
||||
Secure data destruction protocols ensure complete removal of sensitive information.
|
||||
|
||||
COMPLIANCE MONITORING
|
||||
Annual privacy training required for all staff handling student data.
|
||||
Regular audits ensure ongoing compliance with applicable privacy regulations.
|
||||
Privacy impact assessments conducted for new features or data uses.
|
||||
|
||||
Contact: Dr. Lisa Thompson, Chief Privacy Officer
|
||||
Email: privacy@edutech-learning.com
|
||||
Phone: (555) 123-4567"
|
||||
|
||||
FinanceFirst_Investment_Advisory_Agreement_2024,"INVESTMENT ADVISORY AGREEMENT
|
||||
This Investment Advisory Agreement is entered into between FinanceFirst Advisors LLC (""Advisor"") and Madison Investment Group (""Client"") on May 20, 2024.
|
||||
|
||||
ADVISORY SERVICES
|
||||
Advisor will provide comprehensive investment management and financial planning services.
|
||||
Services include portfolio construction, asset allocation, risk assessment, and performance monitoring.
|
||||
Regular portfolio reviews conducted quarterly with detailed performance reporting.
|
||||
|
||||
INVESTMENT AUTHORITY
|
||||
Client grants Advisor discretionary authority to make investment decisions within agreed parameters.
|
||||
Investment universe includes stocks, bonds, ETFs, mutual funds, and alternative investments as appropriate.
|
||||
All trades executed through qualified broker-dealers with best execution practices.
|
||||
|
||||
FEE STRUCTURE
|
||||
Management fee: 1.25% annually on assets under management, calculated and billed quarterly.
|
||||
Performance fee: 15% of returns exceeding S&P 500 benchmark, calculated annually.
|
||||
Additional fees may apply for specialized services such as tax planning or estate planning.
|
||||
|
||||
CLIENT RESPONSIBILITIES
|
||||
Client must provide accurate financial information and promptly communicate changes in circumstances.
|
||||
Investment objectives and risk tolerance should be reviewed and updated annually.
|
||||
Client responsible for reviewing and approving investment policy statement.
|
||||
|
||||
RISK DISCLOSURE
|
||||
All investments carry risk of loss, and past performance does not guarantee future results.
|
||||
Diversification does not ensure profit or protect against loss in declining markets.
|
||||
Alternative investments may have limited liquidity and higher volatility.
|
||||
|
||||
REGULATORY COMPLIANCE
|
||||
Advisor is registered with the Securities and Exchange Commission as an investment advisor.
|
||||
All activities conducted in accordance with Investment Advisers Act of 1940 and applicable regulations.
|
||||
Form ADV Part 2 brochure provided annually with material updates.
|
||||
|
||||
CONFIDENTIALITY
|
||||
All client information treated as confidential and shared only as necessary for service provision.
|
||||
Third-party service providers bound by confidentiality agreements.
|
||||
Client data protected through secure systems and access controls.
|
||||
|
||||
TERMINATION
|
||||
Either party may terminate agreement with 30 days written notice.
|
||||
Upon termination, Advisor will assist with orderly transfer of assets to new custodian or advisor.
|
||||
Final fee calculation prorated to date of termination.
|
||||
|
||||
FinanceFirst Advisors LLC
|
||||
Thomas Anderson, Managing Partner
|
||||
Date: May 20, 2024
|
||||
|
||||
Madison Investment Group
|
||||
Rebecca Martinez, Chief Investment Officer
|
||||
Date: May 21, 2024"
|
||||
|
||||
HealthSystem_PatientCare_Standards_2024,"PATIENT CARE STANDARDS AND PROTOCOLS
|
||||
Metropolitan Health System - Clinical Care Standards
|
||||
Document ID: MHS-PCS-2024-001
|
||||
Effective Date: June 1, 2024
|
||||
|
||||
PATIENT SAFETY PROTOCOLS
|
||||
All patients must have proper identification verification using two unique identifiers.
|
||||
Medication administration requires independent double-check for high-risk medications.
|
||||
Fall risk assessments completed within 4 hours of admission with appropriate interventions.
|
||||
|
||||
CLINICAL DOCUMENTATION
|
||||
Medical records must be completed within 24 hours of patient encounter.
|
||||
All entries require electronic signature with timestamp and provider identification.
|
||||
Critical values and abnormal results must be communicated and documented immediately.
|
||||
|
||||
INFECTION CONTROL
|
||||
Hand hygiene compliance monitored with target rate of 95% or higher.
|
||||
Personal protective equipment used according to transmission-based precautions.
|
||||
Isolation procedures implemented within 2 hours of identification of infectious conditions.
|
||||
|
||||
EMERGENCY RESPONSE
|
||||
Code team response time target: 3 minutes from activation to arrival.
|
||||
Crash cart and emergency equipment checks performed daily and documented.
|
||||
All staff required to maintain current CPR and emergency response certifications.
|
||||
|
||||
PATIENT COMMUNICATION
|
||||
Patient rights and responsibilities communicated upon admission.
|
||||
Informed consent obtained and documented prior to procedures and treatments.
|
||||
Family involvement encouraged with respect for patient privacy preferences.
|
||||
|
||||
QUALITY MEASURES
|
||||
Patient satisfaction scores monitored monthly with target of 4.5/5.0 or higher.
|
||||
Medication error rates tracked with goal of less than 1 per 1000 patient days.
|
||||
Hospital-acquired infection rates measured and benchmarked against national standards.
|
||||
|
||||
STAFF COMPETENCY
|
||||
Annual competency assessments required for all clinical staff.
|
||||
Continuing education requirements: 24 hours annually for nurses, 40 hours for physicians.
|
||||
Specialty certifications maintained according to department and role requirements.
|
||||
|
||||
TECHNOLOGY STANDARDS
|
||||
Electronic health record system used for all patient documentation.
|
||||
Telemedicine capabilities available for remote consultations and monitoring.
|
||||
Clinical decision support tools integrated to assist with diagnosis and treatment decisions.
|
||||
|
||||
Contact: Dr. Patricia Williams, Chief Medical Officer
|
||||
Email: pwilliams@metrohealthsystem.org
|
||||
Phone: (555) 987-6543"
|
||||
|
47
demos/use_cases/mcp_filter/start_agents.sh
Normal file
47
demos/use_cases/mcp_filter/start_agents.sh
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
WAIT_FOR_PIDS=()
|
||||
|
||||
log() {
|
||||
timestamp=$(python3 -c 'from datetime import datetime; print(datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:23])')
|
||||
message="$*"
|
||||
echo "$timestamp - $message"
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
log "Caught signal, terminating all user processes ..."
|
||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
||||
if kill $PID 2> /dev/null; then
|
||||
log "killed process: $PID"
|
||||
fi
|
||||
done
|
||||
exit 1
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
# log "Starting input guards filter on port 10500..."
|
||||
# uv run python -m rag_agent --host 0.0.0.0 --port 10500 --agent input_guards &
|
||||
# WAIT_FOR_PIDS+=($!)
|
||||
|
||||
|
||||
log "Starting query_parser agent on port 10501..."
|
||||
uv run python -m rag_agent --host 0.0.0.0 --port 10501 --agent query_rewriter &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting context_builder agent on port 10502..."
|
||||
uv run python -m rag_agent --host 0.0.0.0 --port 10502 --agent context_builder &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
# log "Starting response_generator agent on port 10400..."
|
||||
# uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
|
||||
# WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting response_generator agent on port 10505..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
||||
wait "$PID"
|
||||
done
|
||||
95
demos/use_cases/mcp_filter/test.rest
Normal file
95
demos/use_cases/mcp_filter/test.rest
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
@baseUrl = http://0.0.0.0:10502
|
||||
@model = gpt-4o
|
||||
|
||||
# Health Check
|
||||
GET {{baseUrl}}/health
|
||||
|
||||
###
|
||||
|
||||
# Test 1: Simple Non-Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello! Can you help me understand what machine learning is?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
###
|
||||
|
||||
# Test 2: Simple Streaming Chat Completion
|
||||
POST {{baseUrl}}/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Explain the concept of artificial intelligence in simple terms."
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### Test 3
|
||||
POST http://localhost:8001/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "{{model}}",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### send request to context builder agent
|
||||
POST http://localhost:10501/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "gpt-4o-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### test fast-llm
|
||||
POST http://localhost:12000/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "fast-llm",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
### test smart-llm
|
||||
POST http://localhost:12000/v1/chat/completions
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"model": "smart-llm",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "hello"
|
||||
}
|
||||
]
|
||||
}
|
||||
1830
demos/use_cases/mcp_filter/uv.lock
generated
Normal file
1830
demos/use_cases/mcp_filter/uv.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -10,7 +10,6 @@ services:
|
|||
volumes:
|
||||
- ../../demos/samples_python/weather_forecast/arch_config.yaml:/app/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ~/archgw_logs:/var/log/
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue