pending changes

This commit is contained in:
Adil Hafeez 2025-12-15 18:17:15 -08:00
parent afffa11e91
commit 358fa856c4
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
21 changed files with 1195 additions and 403 deletions

View file

@ -2,7 +2,7 @@
nodaemon=true
[program:brightstaff]
command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
command=sh -c "RUST_LOG=debug /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
stdout_logfile=/dev/stdout
redirect_stderr=true
stdout_logfile_maxbytes=0

View file

@ -101,8 +101,17 @@ def validate_and_render_schema():
# Process agents section and convert to endpoints
agents = config_yaml.get("agents", [])
for agent in agents:
agent_filters = config_yaml.get("agent_filters", [])
agents_combined = agents + agent_filters
agent_id_keys = set()
for agent in agents_combined:
agent_id = agent.get("id")
if agent_id in agent_id_keys:
raise Exception(
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
)
agent_id_keys.add(agent_id)
agent_endpoint = agent.get("url")
if agent_id and agent_endpoint:

View file

@ -1 +1 @@
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.21

View file

@ -81,7 +81,7 @@ async fn handle_agent_chat(
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
// Initialize services
let agent_selector = AgentSelector::new(router_service);
let pipeline_processor = PipelineProcessor::default();
let mut pipeline_processor = PipelineProcessor::default();
let response_handler = ResponseHandler::new();
// Extract listener name from headers
@ -144,9 +144,9 @@ async fn handle_agent_chat(
debug!("Processing agent pipeline: {}", selected_agent.id);
// Process the filter chain
let processed_messages = pipeline_processor
let chat_history = pipeline_processor
.process_filter_chain(
&chat_completions_request,
&chat_completions_request.messages,
&selected_agent,
&agent_map,
&request_headers,
@ -161,8 +161,8 @@ async fn handle_agent_chat(
debug!("Terminal agent details: {:?}", terminal_agent);
let llm_response = pipeline_processor
.invoke_upstream_agent(
&processed_messages,
.invoke_terminal_agent(
&chat_history,
&chat_completions_request,
terminal_agent,
&request_headers,

View file

@ -8,7 +8,6 @@ use hermesllm::apis::openai::Message;
use tracing::{debug, warn};
use crate::router::llm_router::RouterService;
use crate::utils::mcp_client::McpClient;
/// Errors that can occur during agent selection
#[derive(Debug, thiserror::Error)]
@ -28,14 +27,12 @@ pub enum AgentSelectionError {
/// Service for selecting agents based on routing preferences and listener configuration
pub struct AgentSelector {
router_service: Arc<RouterService>,
mcp_client: McpClient,
}
impl AgentSelector {
pub fn new(router_service: Arc<RouterService>) -> Self {
Self {
router_service,
mcp_client: McpClient::new(),
}
}
@ -152,7 +149,7 @@ impl AgentSelector {
for agent_chain in agents {
// Get the actual agent from the agent_map
let agent = agent_map.get(&agent_chain.id);
// Determine the description to use
let description = if let Some(agent) = agent {
// Check if this is an MCP agent (URL starts with mcp://)
@ -161,36 +158,10 @@ impl AgentSelector {
"Agent {} is an MCP agent, fetching tool description from: {}",
agent.id, agent.url
);
// Fetch description from MCP endpoint
match self
.mcp_client
.fetch_tool_description(&agent.url, agent.tool.as_deref())
.await
{
Ok(mcp_description) => {
if !mcp_description.is_empty() {
debug!(
"Fetched MCP description for agent {}: {}",
agent.id, mcp_description
);
mcp_description
} else {
warn!(
"MCP tool description is empty for agent {}, using config description",
agent.id
);
agent_chain.description.clone().unwrap_or_default()
}
}
Err(e) => {
warn!(
"Failed to fetch MCP description for agent {}: {}, using config description",
agent.id, e
);
agent_chain.description.clone().unwrap_or_default()
}
}
//TODO: fetch description from mcp server
"MCP tool description placeholder from config".to_string()
} else {
// Not an MCP agent, use description from config
agent_chain.description.clone().unwrap_or_default()

View file

@ -0,0 +1,44 @@
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum JsonRpcId {
String(String),
Number(u64),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcRequest {
pub jsonrpc: String,
pub id: JsonRpcId,
pub method: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcNotification {
pub jsonrpc: String,
pub method: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub params: Option<HashMap<String, serde_json::Value>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcError {
pub code: i32,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub data: Option<serde_json::Value>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JsonRpcResponse {
pub jsonrpc: String,
pub id: JsonRpcId,
#[serde(skip_serializing_if = "Option::is_none")]
pub result: Option<HashMap<String, serde_json::Value>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub error: Option<JsonRpcError>,
}

View file

@ -6,6 +6,7 @@ pub mod function_calling;
pub mod pipeline_processor;
pub mod response_handler;
pub mod utils;
pub mod jsonrpc;
#[cfg(test)]
mod integration_tests;

View file

@ -4,7 +4,10 @@ use common::configuration::{Agent, AgentFilterChain};
use common::consts::{ARCH_UPSTREAM_HOST_HEADER, ENVOY_RETRY_HEADER};
use hermesllm::apis::openai::{ChatCompletionsRequest, Message};
use hyper::header::HeaderMap;
use tracing::{debug, warn};
use tracing::{debug, info, warn};
use crate::handlers::jsonrpc::{JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse};
use uuid::Uuid;
/// Errors that can occur during pipeline processing
#[derive(Debug, thiserror::Error)]
@ -25,13 +28,17 @@ pub enum PipelineError {
pub struct PipelineProcessor {
client: reqwest::Client,
url: String,
agent_id_session_map: HashMap<String, String>,
}
const ENVOY_API_ROUTER_ADDRESS: &str = "http://localhost:11000";
impl Default for PipelineProcessor {
fn default() -> Self {
Self {
client: reqwest::Client::new(),
url: "http://localhost:11000/v1/chat/completions".to_string(),
url: ENVOY_API_ROUTER_ADDRESS.to_string(),
agent_id_session_map: HashMap::new(),
}
}
}
@ -41,18 +48,20 @@ impl PipelineProcessor {
Self {
client: reqwest::Client::new(),
url,
agent_id_session_map: HashMap::new(),
}
}
/// Process the filter chain of agents (all except the terminal agent)
pub async fn process_filter_chain(
&self,
initial_request: &ChatCompletionsRequest,
&mut self,
chat_history: &[Message],
agent_filter_chain: &AgentFilterChain,
agent_map: &HashMap<String, Agent>,
request_headers: &HeaderMap,
) -> Result<Vec<Message>, PipelineError> {
let mut chat_completions_history = initial_request.messages.clone();
let mut chat_history_updated = chat_history.to_vec();
for agent_name in &agent_filter_chain.filter_chain {
debug!("Processing filter agent: {}", agent_name);
@ -61,47 +70,83 @@ impl PipelineProcessor {
.get(agent_name)
.ok_or_else(|| PipelineError::AgentNotFound(agent_name.clone()))?;
debug!("Agent details: {:?}", agent);
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
let response_content = self
.send_agent_filter_chain_request(
&chat_completions_history,
initial_request,
info!("executing filter: {}/{}, url: {}, conversation length: {}", agent_name, tool_name, agent.url, chat_history.len());
chat_history_updated = self
.execute_filter(
&chat_history_updated,
agent,
request_headers,
)
.await?;
debug!("Received response from filter agent {}", agent_name);
// Parse the response content as new message history
chat_completions_history =
serde_json::from_str(&response_content).inspect_err(|err| {
warn!(
"Failed to parse response from agent {}, err: {}, response: {}",
agent_name, err, response_content
)
})?;
info!("Received response: updated conversation length: {}", chat_history.len());
}
Ok(chat_completions_history)
Ok(chat_history_updated)
}
/// Send request to a specific agent and return the response content
async fn send_agent_filter_chain_request(
&self,
async fn execute_filter(
&mut self,
messages: &[Message],
original_request: &ChatCompletionsRequest,
agent: &Agent,
request_headers: &HeaderMap,
) -> Result<String, PipelineError> {
let mut request = original_request.clone();
request.messages = messages.to_vec();
) -> Result<Vec<Message>, PipelineError> {
let request_body = serde_json::to_string(&request)?;
debug!("Sending request to agent {}", agent.id);
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
session_id.clone()
} else {
let session_id = self.get_new_session_id(&agent.id).await;
self.agent_id_session_map
.insert(agent.id.clone(), session_id.clone());
session_id
};
// let mut request = original_request.clone();
// request.messages = messages.to_vec();
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
let arguments = serde_json::json!({
"messages": messages
});
let params = serde_json::json!({
"name": tool_name,
"arguments": arguments
});
let json_rpc_request = JsonRpcRequest {
jsonrpc: "2.0".to_string(),
id: JsonRpcId::String(Uuid::new_v4().to_string()),
method: "tools/call".to_string(),
params: Some(serde_json::from_value(params)?),
};
let request_body = serde_json::to_string(&json_rpc_request)?;
info!("Sending request to agent {}", agent.id);
info!("Request body: {}", request_body);
// Pretty print for debugging
let pretty_body = serde_json::to_string_pretty(&json_rpc_request)?;
info!("Request body (pretty):\n{}", pretty_body);
let mut agent_headers = request_headers.clone();
info!("Using MCP session ID {} for agent {}", mcp_session_id, agent.id);
// Log all headers being sent
info!("Headers being sent:");
for (key, value) in agent_headers.iter() {
info!(" {}: {:?}", key, value);
}
agent_headers.insert(
"mcp-session-id",
hyper::header::HeaderValue::from_str(&mcp_session_id).unwrap(),
);
agent_headers.remove(hyper::header::CONTENT_LENGTH);
agent_headers.insert(
ARCH_UPSTREAM_HOST_HEADER,
@ -114,9 +159,24 @@ impl PipelineProcessor {
hyper::header::HeaderValue::from_str("3").unwrap(),
);
agent_headers.insert(
"Accept",
hyper::header::HeaderValue::from_static("application/json, text/event-stream"),
);
agent_headers.insert(
"Content-Type",
hyper::header::HeaderValue::from_static("application/json"),
);
info!("Final headers being sent:");
for (key, value) in agent_headers.iter() {
info!(" {}: {:?}", key, value);
}
let response = self
.client
.post(&self.url)
.post(format!("{}/mcp", self.url))
.headers(agent_headers)
.body(request_body)
.send()
@ -124,24 +184,149 @@ impl PipelineProcessor {
let response_bytes = response.bytes().await?;
// Parse the response as JSON to extract the content
let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
info!(
"response bytes in str: {}",
String::from_utf8_lossy(&response_bytes)
);
let content = response_json
.get("choices")
.and_then(|choices| choices.as_array())
.and_then(|choices| choices.first())
.and_then(|choice| choice.get("message"))
.and_then(|message| message.get("content"))
.and_then(|content| content.as_str())
let response_str = String::from_utf8_lossy(&response_bytes);
let lines: Vec<&str> = response_str.lines().collect();
// Validate SSE format: first line should be "event: message"
if lines.is_empty() || lines[0] != "event: message" {
warn!("Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}", agent.id, lines.first());
return Err(PipelineError::NoContentInResponse(format!(
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
agent.id
)));
}
// Find the data line
let data_lines: Vec<&str> = lines
.iter()
.filter(|line| line.starts_with("data: "))
.copied()
.collect();
if data_lines.len() != 1 {
warn!(
"Expected exactly one 'data:' line from agent {}, found {}",
agent.id,
data_lines.len()
);
return Err(PipelineError::NoContentInResponse(format!(
"Expected exactly one 'data:' line from agent {}, found {}",
agent.id,
data_lines.len()
)));
}
let data_chunk = &data_lines[0][6..]; // Skip "data: " prefix
let response: JsonRpcResponse = serde_json::from_str(data_chunk)?;
let response_result = response
.result
.ok_or_else(|| PipelineError::NoChoicesInResponse(agent.id.clone()))?;
let response_json = response_result
.get("structuredContent")
.ok_or_else(|| PipelineError::NoChoicesInResponse(agent.id.clone()))?;
// Parse the response as JSON to extract the content
// let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
let messages: Vec<Message> = response_json
.get("result")
.and_then(|v| v.as_array())
.ok_or_else(|| PipelineError::NoContentInResponse(agent.id.clone()))?
.iter()
.map(|msg_value| serde_json::from_value(msg_value.clone()))
.collect::<Result<Vec<Message>, _>>()
.map_err(PipelineError::ParseError)?;
Ok(messages)
}
async fn get_new_session_id(&self, agent_id: &str) -> String {
let initialize_request = JsonRpcRequest {
jsonrpc: "2.0".to_string(),
id: JsonRpcId::Number(1),
method: "initialize".to_string(),
params: Some({
let mut params = HashMap::new();
params.insert(
"protocolVersion".to_string(),
serde_json::Value::String("2024-11-05".to_string()),
);
params.insert("capabilities".to_string(), serde_json::json!({}));
params.insert(
"clientInfo".to_string(),
serde_json::json!({
"name": "brightstaff",
"version": "1.0.0"
}),
);
params
}),
};
let request_body = serde_json::to_string(&initialize_request).unwrap();
info!("Initializing MCP session for agent {}", agent_id);
info!("Initialize request body: {}", request_body);
let response = self
.client
.post(format!("{}/mcp", self.url))
.header("Content-Type", "application/json")
.header("Accept", "application/json, text/event-stream")
.header(ARCH_UPSTREAM_HOST_HEADER, agent_id)
.body(request_body)
.send()
.await
.expect("Failed to initialize MCP session");
info!("Initialize response status: {}", response.status());
info!("Initialize response headers: {:?}", response.headers());
let session_id = response
.headers()
.get("mcp-session-id")
.and_then(|v| v.to_str().ok())
.expect("No mcp-session-id in response")
.to_string();
Ok(content)
info!("Created new MCP session for agent {}: {}", agent_id, session_id);
// Send initialized notification (without id field per JSON-RPC 2.0 spec)
let initialized_notification = JsonRpcNotification {
jsonrpc: "2.0".to_string(),
method: "notifications/initialized".to_string(),
params: None,
};
let notification_body = serde_json::to_string(&initialized_notification).unwrap();
info!("Sending initialized notification: {}", notification_body);
let notif_response = self
.client
.post(format!("{}/mcp", self.url))
.header("Content-Type", "application/json")
.header("Accept", "application/json, text/event-stream")
.header("mcp-session-id", &session_id)
.header(ARCH_UPSTREAM_HOST_HEADER, agent_id)
.body(notification_body)
.send()
.await
.expect("Failed to send initialized notification");
info!("Initialized notification response status: {}", notif_response.status());
session_id
}
/// Send request to terminal agent and return the raw response for streaming
pub async fn invoke_upstream_agent(
pub async fn invoke_terminal_agent(
&self,
messages: &[Message],
original_request: &ChatCompletionsRequest,
@ -169,7 +354,7 @@ impl PipelineProcessor {
let response = self
.client
.post(&self.url)
.post(format!("{}/v1/chat/completions", self.url))
.headers(agent_headers)
.body(request_body)
.send()

View file

@ -5,7 +5,7 @@ use brightstaff::handlers::function_calling::{function_calling_chat_handler};
use brightstaff::router::llm_router::RouterService;
use brightstaff::utils::tracing::init_tracer;
use bytes::Bytes;
use common::configuration::Configuration;
use common::configuration::{Agent, Configuration};
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
use hyper::body::Incoming;
@ -63,9 +63,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let arch_config = Arc::new(config);
// combine agents and agent_filters into a single list of agents
let all_agents: Vec<Agent> = arch_config
.agents
.as_deref()
.unwrap_or_default()
.iter()
.chain(arch_config.agent_filters.as_deref().unwrap_or_default())
.cloned()
.collect();
let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
let agents_list = Arc::new(RwLock::new(arch_config.agents.clone()));
let agent_filters = Arc::new(RwLock::new(arch_config.agent_filters.clone()));
let agents_list = Arc::new(RwLock::new(Some(all_agents)));
let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
debug!(
@ -112,7 +121,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone();
let agents_list = agents_list.clone();
let agent_filters = agent_filters.clone();
let listeners = listeners.clone();
let service = service_fn(move |req| {
let router_service = Arc::clone(&router_service);
@ -121,7 +129,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
let llm_providers = llm_providers.clone();
let model_aliases = Arc::clone(&model_aliases);
let agents_list = agents_list.clone();
let agent_filters = agent_filters.clone();
let listeners = listeners.clone();
async move {

1
crates/build.sh Normal file
View file

@ -0,0 +1 @@
cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway && cargo build --release -p brightstaff

View file

@ -21,16 +21,10 @@ pub struct ModelAlias {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Agent {
pub id: String,
pub transport: Option<String>,
pub tool: Option<String>,
pub url: String,
pub kind: Option<String>,
pub url: String,
pub tool: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AgentFilter {
pub id: String,
pub url: String,
pub tool: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -65,7 +59,7 @@ pub struct Configuration {
pub mode: Option<GatewayMode>,
pub routing: Option<Routing>,
pub agents: Option<Vec<Agent>>,
pub agent_filters: Option<Vec<AgentFilter>>,
pub agent_filters: Option<Vec<Agent>>,
pub listeners: Vec<Listener>,
}

View file

@ -2,23 +2,21 @@ version: v0.3.0
agents:
- id: rag_agent
url: mcp://host.docker.internal:10501
# only sse is supported
# transport: sse or stdio
# optional tool name, defaults to "invoke"
# tool: invoke
url: mcp://host.docker.internal:10505
- id: travel_agent
url: mcp://host.docker.internal:10502
transport: streamable-http
tool: invoke
url: mcp://host.docker.internal:10401
agent_filters:
- id: query_rewriter
url: mcp://host.docker.internal:10500
# tool is optional, defaults to id
# tool: query_rewriter
transport: streamable-http
tool: query_rewriter
url: mcp://host.docker.internal:10501
- id: context_builder
url: mcp://host.docker.internal:10500
- id: input_guards
url: mcp://host.docker.internal:10500
transport: streamable-http
tool: context_builder
url: mcp://host.docker.internal:10502
model_providers:
- model: openai/gpt-4o-mini
@ -35,20 +33,20 @@ model_aliases:
listeners:
- type: agent
name: agent_1
port: 8001
router: arch_agent_router
agents:
- id: rag_agent
description: virtual assistant for retrieval augmented generation tasks
filter_chain:
- input_guards
- query_rewriter
- context_builder
- id: travel_agent
description: virtual assistant for travel bookings and recommendations
filter_chain:
- input_guards
# - id: travel_agent
# description: virtual assistant for travel bookings and recommendations
# filter_chain:
# - input_guards
tracing:
random_sampling: 100

View file

@ -0,0 +1,86 @@
### Initialize MCP Session (SSE)
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"capabilities":{},"protocolVersion":"2024-11-05","clientInfo":{"name":"test","version":"1.0.0"}}}
### Send Initialized Notification
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
{
"jsonrpc": "2.0",
"method": "notifications/initialized"
}
### List Tools
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: eb10a691b36e4547b6c93c5dc5b47e11
{
"jsonrpc": "2.0",
"id": "list-tools-1",
"method": "tools/list"
}
### Call Query Rewriter Tool
POST http://localhost:10501/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: 6b95ff75825a402b90eb3ea07e23fbce
{
"jsonrpc": "2.0",
"id": "3d3b886a-6216-4a26-a422-7a972529c0e7",
"method": "tools/call",
"params": {
"arguments": {
"messages": [
{
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?",
"role": "user"
}
]
},
"name": "query_rewriter"
}
}
### another test
# Content-Type: application/json
# Accept: application/json, text/event-stream
# mcp-session-id: ed7a81a1d39549ecaadb867a6b2daf1e
POST http://localhost:10501/mcp
content-type: application/json
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
accept: application/json, text/event-stream
{"jsonrpc":"2.0","id":"4bb1043a-2953-4bcd-b801-f270b0ae8c39","method":"tools/call","params":{"arguments":{"messages":[{"content":"What is the guaranteed uptime percentage for TechCorp's cloud services?","role":"user"}]},"name":"query_rewriter"}}
### stream test
POST http://localhost:10501/mcp
content-type: application/json
mcp-session-id: 60be9fb816304cb6b9ecdb91d89cd91f
accept: application/json, text/event-stream
{
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {
"name": "long_job",
"arguments": {
"n": 3
}
}
}

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [
"click>=8.2.1",
"mcp>=1.13.1",
"fastmcp>=2.12.2",
"fastmcp>=2.14",
"pydantic>=2.11.7",
"fastapi>=0.104.1",
"uvicorn>=0.24.0",

View file

@ -1,50 +1,88 @@
import click
from mcp.server.fastmcp import FastMCP
from fastmcp import FastMCP
mcp = None
@click.command()
@click.option("--transport", "transport", default="sse", help="Transport type: stdio or sse")
@click.option(
"--transport",
"transport",
default="streamable-http",
help="Transport type: stdio or sse",
)
@click.option("--host", "host", default="localhost", help="Host to bind MCP server to")
@click.option("--port", "port", type=int, default=10500, help="Port for MCP server")
@click.option("--agent", "agent", required=True, help="Agent name: query_rewriter, context_builder, or response_generator")
@click.option("--name", "agent_name", default=None, help="Custom MCP server name (defaults to agent type)")
def main(host, port, agent, transport, agent_name):
@click.option(
"--agent",
"agent",
required=True,
help="Agent name: query_rewriter, context_builder, or response_generator",
)
@click.option(
"--name",
"agent_name",
default=None,
help="Custom MCP server name (defaults to agent type)",
)
@click.option(
"--rest-server",
"rest_server",
is_flag=True,
help="Start REST server instead of MCP server",
)
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
def main(host, port, agent, transport, agent_name, rest_server, rest_port):
"""Start a RAG agent as an MCP server."""
# Map friendly names to agent modules
agent_map = {
"query_rewriter": ("rag_agent.query_rewriter", "Query Rewriter Agent"),
"context_builder": ("rag_agent.context_builder_agent", "Context Builder Agent"),
"response_generator": ("rag_agent.response_generator", "Response Generator Agent"),
"context_builder": ("rag_agent.context_builder", "Context Builder Agent"),
"response_generator": (
"rag_agent.rag_agent",
"Response Generator Agent",
),
}
module_name, default_name = agent_map[agent]
mcp_name = agent_name or default_name
global mcp
mcp = FastMCP(mcp_name, host=host, port=port)
if agent not in agent_map:
print(f"Error: Unknown agent '{agent}'")
print(f"Available agents: {', '.join(agent_map.keys())}")
return
module_name, default_name = agent_map[agent]
mcp_name = agent_name or default_name
print(f"Starting MCP server: {mcp_name}")
print(f" Agent: {agent}")
print(f" Transport: {transport}")
print(f" Host: {host}")
print(f" Port: {port}")
global mcp
mcp = FastMCP(mcp_name, host=host, port=port)
# Import the agent module to register its tools
import importlib
importlib.import_module(module_name)
print(f"Agent '{agent}' loaded successfully")
print(f"MCP server ready on {transport}://{host}:{port}")
mcp.run(transport=transport)
if rest_server:
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
if agent == "response_generator":
from rag_agent.rag_agent import start_server
start_server(host=host, port=rest_port)
return
else:
print("Please specify an agent to start with --agent option.")
return
else:
print(f"Starting MCP server: {mcp_name}")
print(f" Agent: {agent}")
print(f" Transport: {transport}")
print(f" Host: {host}")
print(f" Port: {port}")
# Import the agent module to register its tools
import importlib
importlib.import_module(module_name)
print(f"Agent '{agent}' loaded successfully")
print(f"MCP server ready on {transport}://{host}:{port}")
mcp.run(transport=transport)
if __name__ == "__main__":

View file

@ -191,54 +191,30 @@ class Response(BaseModel):
# FastAPI app for REST server
app = FastAPI(title="RAG Content Builder Agent", version="1.0.0")
@mcp.tool()
@app.post("/v1/chat/completions")
async def context_builder(
request_body: ChatCompletionRequest
) -> ChatCompletionResponse:
""" chat completions endpoint that augments user queries with relevant context from the knowledge base."""
async def context_builder(messages: List[ChatMessage]) -> List[ChatMessage]:
"""chat completions endpoint that augments user queries with relevant context from the knowledge base."""
import time
import uuid
logger.info(
f"Received chat completion request with {len(request_body.messages)} messages"
)
logger.info(f"Received chat completion request with {len(messages)} messages")
# Get traceparent header from HTTP request using FastMCP's dependency function
headers = get_http_headers()
traceparent_header = headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
logger.info("No traceparent header found")
# Augment the user query with relevant context
updated_messages = await augment_query_with_context(
request_body.messages, traceparent_header
)
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
updated_messages = await augment_query_with_context(messages, traceparent_header)
response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request_body.model,
choices=[
{
"index": 0,
"message": {"role": "user", "content": messages_history_json},
"finish_reason": "stop",
}
],
usage={
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
"completion_tokens": len("Context added to user query.".split()),
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
+ len("Context added to user query.".split()),
},
)
return response
# Return as dict to minimize text serialization
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
def main():

View file

@ -1,3 +1,4 @@
import asyncio
import json
from pydantic import BaseModel
from typing import List, Optional, Dict, Any
@ -11,6 +12,9 @@ from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
from . import mcp
from fastmcp.server.dependencies import get_http_headers
from fastmcp.dependencies import CurrentContext
from fastmcp.server.context import Context
# Set up logging
logging.basicConfig(
level=logging.INFO,
@ -29,10 +33,11 @@ archgw_client = AsyncOpenAI(
api_key="EMPTY", # archgw doesn't require a real API key
)
async def rewrite_query_with_archgw(
messages: List[ChatMessage], traceparent_header: str
) -> str:
""" Rewrite the user query using LLM for better retrieval. """
"""Rewrite the user query using LLM for better retrieval."""
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
Given a conversation history, rewrite the last user message to be more specific and context-aware.
@ -89,33 +94,31 @@ class Response(BaseModel):
app = FastAPI(title="RAG Agent Query Parser", version="1.0.0")
@app.post("/v1/chat/completions")
@mcp.tool()
async def query_rewriter(request_body: ChatCompletionRequest):
"""Chat completions endpoint that rewrites the last user query using archgw."""
async def query_rewriter(messages: List[ChatMessage]) -> List[ChatMessage]:
"""Chat completions endpoint that rewrites the last user query using archgw.
Returns a dict with a 'messages' key containing the updated message list.
"""
import time
import uuid
logger.info(
f"Received chat completion request with {len(request_body.messages)} messages"
)
logger.info(f"Received chat completion request with {len(messages)} messages")
# Get traceparent header from HTTP request using FastMCP's dependency function
headers = get_http_headers()
traceparent_header = headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
logger.info("No traceparent header found")
# Call archgw to rewrite the last user query
rewritten_query = await rewrite_query_with_archgw(
request_body.messages, traceparent_header
)
rewritten_query = await rewrite_query_with_archgw(messages, traceparent_header)
# Create updated messages with the rewritten query
updated_messages = request_body.messages.copy()
updated_messages = messages.copy()
# Find and update the last user message with the rewritten query
for i in range(len(updated_messages) - 1, -1, -1):
@ -127,28 +130,8 @@ async def query_rewriter(request_body: ChatCompletionRequest):
)
break
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
response = ChatCompletionResponse(
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
created=int(time.time()),
model=request_body.model,
choices=[
{
"index": 0,
"message": {"role": "user", "content": messages_history_json},
"finish_reason": "stop",
}
],
usage={
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
"completion_tokens": len("Updated query for better retrieval.".split()),
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
+ len("Updated query for better retrieval.".split()),
},
)
return response
# Return as dict to minimize text serialization
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
@app.get("/health")

View file

@ -63,9 +63,8 @@ def prepare_response_messages(request_body: ChatCompletionRequest):
@app.post("/v1/chat/completions")
@mcp.tool(name="invoke")
async def chat_completion(request_body: ChatCompletionRequest):
"""Chat completions endpoint that generates a coherent response based on all context."""
async def chat_completion_http(request_body: ChatCompletionRequest):
"""HTTP endpoint for chat completions with streaming support."""
logger.info(
f"Received chat completion request with {len(request_body.messages)} messages"
)
@ -73,7 +72,7 @@ async def chat_completion(request_body: ChatCompletionRequest):
# Get traceparent header from HTTP request using FastMCP's dependency function
headers = get_http_headers()
traceparent_header = headers.get("traceparent")
if traceparent_header:
logger.info(f"Received traceparent header: {traceparent_header}")
else:
@ -92,6 +91,23 @@ async def chat_completion(request_body: ChatCompletionRequest):
return await non_streaming_chat_completions(request_body, traceparent_header)
@mcp.tool(name="invoke")
async def chat_completion(request_body: ChatCompletionRequest):
"""Chat completions endpoint that generates a coherent response based on all context.
For MCP calls, streaming is collected and returned as a complete response.
"""
logger.info(
f"[MCP] Received chat completion request with {len(request_body.messages)} messages"
)
# For MCP, always use non-streaming to return a complete response
response = await non_streaming_chat_completions(
request_body, traceparent_header=None
)
return response
async def stream_chat_completions(
request_body: ChatCompletionRequest, traceparent_header: str = None
):

View file

@ -21,16 +21,25 @@ cleanup() {
trap cleanup EXIT
log "Starting query_parser agent on port 10500..."
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent query_parser &
# log "Starting input guards filter on port 10500..."
# uv run python -m rag_agent --host 0.0.0.0 --port 10500 --agent input_guards &
# WAIT_FOR_PIDS+=($!)
log "Starting query_parser agent on port 10501..."
uv run python -m rag_agent --host 0.0.0.0 --port 10501 --agent query_rewriter &
WAIT_FOR_PIDS+=($!)
log "Starting context_builder agent on port 10501..."
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent context_builder &
log "Starting context_builder agent on port 10502..."
uv run python -m rag_agent --host 0.0.0.0 --port 10502 --agent context_builder &
WAIT_FOR_PIDS+=($!)
log "Starting response_generator agent on port 10502..."
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent response_generator &
# log "Starting response_generator agent on port 10400..."
# uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
# WAIT_FOR_PIDS+=($!)
log "Starting response_generator agent on port 10505..."
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
WAIT_FOR_PIDS+=($!)
for PID in "${WAIT_FOR_PIDS[@]}"; do

View file

@ -49,7 +49,7 @@ Content-Type: application/json
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
}
],
"stream": false
"stream": true
}
### send request to context builder agent

File diff suppressed because it is too large Load diff