mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
pending changes
This commit is contained in:
parent
afffa11e91
commit
358fa856c4
21 changed files with 1195 additions and 403 deletions
|
|
@ -2,7 +2,7 @@
|
|||
nodaemon=true
|
||||
|
||||
[program:brightstaff]
|
||||
command=sh -c "RUST_LOG=info /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
|
||||
command=sh -c "RUST_LOG=debug /app/brightstaff 2>&1 | tee /var/log/brightstaff.log | while IFS= read -r line; do echo '[brightstaff]' \"$line\"; done"
|
||||
stdout_logfile=/dev/stdout
|
||||
redirect_stderr=true
|
||||
stdout_logfile_maxbytes=0
|
||||
|
|
|
|||
|
|
@ -101,8 +101,17 @@ def validate_and_render_schema():
|
|||
|
||||
# Process agents section and convert to endpoints
|
||||
agents = config_yaml.get("agents", [])
|
||||
for agent in agents:
|
||||
agent_filters = config_yaml.get("agent_filters", [])
|
||||
agents_combined = agents + agent_filters
|
||||
agent_id_keys = set()
|
||||
|
||||
for agent in agents_combined:
|
||||
agent_id = agent.get("id")
|
||||
if agent_id in agent_id_keys:
|
||||
raise Exception(
|
||||
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
|
||||
)
|
||||
agent_id_keys.add(agent_id)
|
||||
agent_endpoint = agent.get("url")
|
||||
|
||||
if agent_id and agent_endpoint:
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.2
|
||||
docker build -f arch/Dockerfile . -t katanemo/archgw -t katanemo/archgw:0.3.21
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ async fn handle_agent_chat(
|
|||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, AgentFilterChainError> {
|
||||
// Initialize services
|
||||
let agent_selector = AgentSelector::new(router_service);
|
||||
let pipeline_processor = PipelineProcessor::default();
|
||||
let mut pipeline_processor = PipelineProcessor::default();
|
||||
let response_handler = ResponseHandler::new();
|
||||
|
||||
// Extract listener name from headers
|
||||
|
|
@ -144,9 +144,9 @@ async fn handle_agent_chat(
|
|||
debug!("Processing agent pipeline: {}", selected_agent.id);
|
||||
|
||||
// Process the filter chain
|
||||
let processed_messages = pipeline_processor
|
||||
let chat_history = pipeline_processor
|
||||
.process_filter_chain(
|
||||
&chat_completions_request,
|
||||
&chat_completions_request.messages,
|
||||
&selected_agent,
|
||||
&agent_map,
|
||||
&request_headers,
|
||||
|
|
@ -161,8 +161,8 @@ async fn handle_agent_chat(
|
|||
debug!("Terminal agent details: {:?}", terminal_agent);
|
||||
|
||||
let llm_response = pipeline_processor
|
||||
.invoke_upstream_agent(
|
||||
&processed_messages,
|
||||
.invoke_terminal_agent(
|
||||
&chat_history,
|
||||
&chat_completions_request,
|
||||
terminal_agent,
|
||||
&request_headers,
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ use hermesllm::apis::openai::Message;
|
|||
use tracing::{debug, warn};
|
||||
|
||||
use crate::router::llm_router::RouterService;
|
||||
use crate::utils::mcp_client::McpClient;
|
||||
|
||||
/// Errors that can occur during agent selection
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -28,14 +27,12 @@ pub enum AgentSelectionError {
|
|||
/// Service for selecting agents based on routing preferences and listener configuration
|
||||
pub struct AgentSelector {
|
||||
router_service: Arc<RouterService>,
|
||||
mcp_client: McpClient,
|
||||
}
|
||||
|
||||
impl AgentSelector {
|
||||
pub fn new(router_service: Arc<RouterService>) -> Self {
|
||||
Self {
|
||||
router_service,
|
||||
mcp_client: McpClient::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -152,7 +149,7 @@ impl AgentSelector {
|
|||
for agent_chain in agents {
|
||||
// Get the actual agent from the agent_map
|
||||
let agent = agent_map.get(&agent_chain.id);
|
||||
|
||||
|
||||
// Determine the description to use
|
||||
let description = if let Some(agent) = agent {
|
||||
// Check if this is an MCP agent (URL starts with mcp://)
|
||||
|
|
@ -161,36 +158,10 @@ impl AgentSelector {
|
|||
"Agent {} is an MCP agent, fetching tool description from: {}",
|
||||
agent.id, agent.url
|
||||
);
|
||||
|
||||
// Fetch description from MCP endpoint
|
||||
match self
|
||||
.mcp_client
|
||||
.fetch_tool_description(&agent.url, agent.tool.as_deref())
|
||||
.await
|
||||
{
|
||||
Ok(mcp_description) => {
|
||||
if !mcp_description.is_empty() {
|
||||
debug!(
|
||||
"Fetched MCP description for agent {}: {}",
|
||||
agent.id, mcp_description
|
||||
);
|
||||
mcp_description
|
||||
} else {
|
||||
warn!(
|
||||
"MCP tool description is empty for agent {}, using config description",
|
||||
agent.id
|
||||
);
|
||||
agent_chain.description.clone().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(
|
||||
"Failed to fetch MCP description for agent {}: {}, using config description",
|
||||
agent.id, e
|
||||
);
|
||||
agent_chain.description.clone().unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: fetch description from mcp server
|
||||
|
||||
"MCP tool description placeholder from config".to_string()
|
||||
} else {
|
||||
// Not an MCP agent, use description from config
|
||||
agent_chain.description.clone().unwrap_or_default()
|
||||
|
|
|
|||
44
crates/brightstaff/src/handlers/jsonrpc.rs
Normal file
44
crates/brightstaff/src/handlers/jsonrpc.rs
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum JsonRpcId {
|
||||
String(String),
|
||||
Number(u64),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcRequest {
|
||||
pub jsonrpc: String,
|
||||
pub id: JsonRpcId,
|
||||
pub method: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub params: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcNotification {
|
||||
pub jsonrpc: String,
|
||||
pub method: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub params: Option<HashMap<String, serde_json::Value>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcError {
|
||||
pub code: i32,
|
||||
pub message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub data: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct JsonRpcResponse {
|
||||
pub jsonrpc: String,
|
||||
pub id: JsonRpcId,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub result: Option<HashMap<String, serde_json::Value>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub error: Option<JsonRpcError>,
|
||||
}
|
||||
|
|
@ -6,6 +6,7 @@ pub mod function_calling;
|
|||
pub mod pipeline_processor;
|
||||
pub mod response_handler;
|
||||
pub mod utils;
|
||||
pub mod jsonrpc;
|
||||
|
||||
#[cfg(test)]
|
||||
mod integration_tests;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@ use common::configuration::{Agent, AgentFilterChain};
|
|||
use common::consts::{ARCH_UPSTREAM_HOST_HEADER, ENVOY_RETRY_HEADER};
|
||||
use hermesllm::apis::openai::{ChatCompletionsRequest, Message};
|
||||
use hyper::header::HeaderMap;
|
||||
use tracing::{debug, warn};
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::handlers::jsonrpc::{JsonRpcId, JsonRpcNotification, JsonRpcRequest, JsonRpcResponse};
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Errors that can occur during pipeline processing
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
@ -25,13 +28,17 @@ pub enum PipelineError {
|
|||
pub struct PipelineProcessor {
|
||||
client: reqwest::Client,
|
||||
url: String,
|
||||
agent_id_session_map: HashMap<String, String>,
|
||||
}
|
||||
|
||||
const ENVOY_API_ROUTER_ADDRESS: &str = "http://localhost:11000";
|
||||
|
||||
impl Default for PipelineProcessor {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
url: "http://localhost:11000/v1/chat/completions".to_string(),
|
||||
url: ENVOY_API_ROUTER_ADDRESS.to_string(),
|
||||
agent_id_session_map: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -41,18 +48,20 @@ impl PipelineProcessor {
|
|||
Self {
|
||||
client: reqwest::Client::new(),
|
||||
url,
|
||||
agent_id_session_map: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Process the filter chain of agents (all except the terminal agent)
|
||||
pub async fn process_filter_chain(
|
||||
&self,
|
||||
initial_request: &ChatCompletionsRequest,
|
||||
&mut self,
|
||||
chat_history: &[Message],
|
||||
agent_filter_chain: &AgentFilterChain,
|
||||
agent_map: &HashMap<String, Agent>,
|
||||
request_headers: &HeaderMap,
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
let mut chat_completions_history = initial_request.messages.clone();
|
||||
|
||||
let mut chat_history_updated = chat_history.to_vec();
|
||||
|
||||
for agent_name in &agent_filter_chain.filter_chain {
|
||||
debug!("Processing filter agent: {}", agent_name);
|
||||
|
|
@ -61,47 +70,83 @@ impl PipelineProcessor {
|
|||
.get(agent_name)
|
||||
.ok_or_else(|| PipelineError::AgentNotFound(agent_name.clone()))?;
|
||||
|
||||
debug!("Agent details: {:?}", agent);
|
||||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
|
||||
let response_content = self
|
||||
.send_agent_filter_chain_request(
|
||||
&chat_completions_history,
|
||||
initial_request,
|
||||
info!("executing filter: {}/{}, url: {}, conversation length: {}", agent_name, tool_name, agent.url, chat_history.len());
|
||||
|
||||
chat_history_updated = self
|
||||
.execute_filter(
|
||||
&chat_history_updated,
|
||||
agent,
|
||||
request_headers,
|
||||
)
|
||||
.await?;
|
||||
|
||||
debug!("Received response from filter agent {}", agent_name);
|
||||
|
||||
// Parse the response content as new message history
|
||||
chat_completions_history =
|
||||
serde_json::from_str(&response_content).inspect_err(|err| {
|
||||
warn!(
|
||||
"Failed to parse response from agent {}, err: {}, response: {}",
|
||||
agent_name, err, response_content
|
||||
)
|
||||
})?;
|
||||
info!("Received response: updated conversation length: {}", chat_history.len());
|
||||
}
|
||||
|
||||
Ok(chat_completions_history)
|
||||
Ok(chat_history_updated)
|
||||
}
|
||||
|
||||
/// Send request to a specific agent and return the response content
|
||||
async fn send_agent_filter_chain_request(
|
||||
&self,
|
||||
async fn execute_filter(
|
||||
&mut self,
|
||||
messages: &[Message],
|
||||
original_request: &ChatCompletionsRequest,
|
||||
agent: &Agent,
|
||||
request_headers: &HeaderMap,
|
||||
) -> Result<String, PipelineError> {
|
||||
let mut request = original_request.clone();
|
||||
request.messages = messages.to_vec();
|
||||
) -> Result<Vec<Message>, PipelineError> {
|
||||
|
||||
let request_body = serde_json::to_string(&request)?;
|
||||
debug!("Sending request to agent {}", agent.id);
|
||||
let mcp_session_id = if let Some(session_id) = self.agent_id_session_map.get(&agent.id) {
|
||||
session_id.clone()
|
||||
} else {
|
||||
let session_id = self.get_new_session_id(&agent.id).await;
|
||||
self.agent_id_session_map
|
||||
.insert(agent.id.clone(), session_id.clone());
|
||||
session_id
|
||||
};
|
||||
|
||||
// let mut request = original_request.clone();
|
||||
// request.messages = messages.to_vec();
|
||||
|
||||
let tool_name = agent.tool.as_deref().unwrap_or(&agent.id);
|
||||
|
||||
let arguments = serde_json::json!({
|
||||
"messages": messages
|
||||
});
|
||||
|
||||
let params = serde_json::json!({
|
||||
"name": tool_name,
|
||||
"arguments": arguments
|
||||
});
|
||||
|
||||
let json_rpc_request = JsonRpcRequest {
|
||||
jsonrpc: "2.0".to_string(),
|
||||
id: JsonRpcId::String(Uuid::new_v4().to_string()),
|
||||
method: "tools/call".to_string(),
|
||||
params: Some(serde_json::from_value(params)?),
|
||||
};
|
||||
|
||||
let request_body = serde_json::to_string(&json_rpc_request)?;
|
||||
info!("Sending request to agent {}", agent.id);
|
||||
info!("Request body: {}", request_body);
|
||||
|
||||
// Pretty print for debugging
|
||||
let pretty_body = serde_json::to_string_pretty(&json_rpc_request)?;
|
||||
info!("Request body (pretty):\n{}", pretty_body);
|
||||
|
||||
let mut agent_headers = request_headers.clone();
|
||||
info!("Using MCP session ID {} for agent {}", mcp_session_id, agent.id);
|
||||
|
||||
// Log all headers being sent
|
||||
info!("Headers being sent:");
|
||||
for (key, value) in agent_headers.iter() {
|
||||
info!(" {}: {:?}", key, value);
|
||||
}
|
||||
|
||||
agent_headers.insert(
|
||||
"mcp-session-id",
|
||||
hyper::header::HeaderValue::from_str(&mcp_session_id).unwrap(),
|
||||
);
|
||||
agent_headers.remove(hyper::header::CONTENT_LENGTH);
|
||||
agent_headers.insert(
|
||||
ARCH_UPSTREAM_HOST_HEADER,
|
||||
|
|
@ -114,9 +159,24 @@ impl PipelineProcessor {
|
|||
hyper::header::HeaderValue::from_str("3").unwrap(),
|
||||
);
|
||||
|
||||
agent_headers.insert(
|
||||
"Accept",
|
||||
hyper::header::HeaderValue::from_static("application/json, text/event-stream"),
|
||||
);
|
||||
|
||||
agent_headers.insert(
|
||||
"Content-Type",
|
||||
hyper::header::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
|
||||
info!("Final headers being sent:");
|
||||
for (key, value) in agent_headers.iter() {
|
||||
info!(" {}: {:?}", key, value);
|
||||
}
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&self.url)
|
||||
.post(format!("{}/mcp", self.url))
|
||||
.headers(agent_headers)
|
||||
.body(request_body)
|
||||
.send()
|
||||
|
|
@ -124,24 +184,149 @@ impl PipelineProcessor {
|
|||
|
||||
let response_bytes = response.bytes().await?;
|
||||
|
||||
// Parse the response as JSON to extract the content
|
||||
let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
|
||||
info!(
|
||||
"response bytes in str: {}",
|
||||
String::from_utf8_lossy(&response_bytes)
|
||||
);
|
||||
|
||||
let content = response_json
|
||||
.get("choices")
|
||||
.and_then(|choices| choices.as_array())
|
||||
.and_then(|choices| choices.first())
|
||||
.and_then(|choice| choice.get("message"))
|
||||
.and_then(|message| message.get("content"))
|
||||
.and_then(|content| content.as_str())
|
||||
let response_str = String::from_utf8_lossy(&response_bytes);
|
||||
let lines: Vec<&str> = response_str.lines().collect();
|
||||
|
||||
// Validate SSE format: first line should be "event: message"
|
||||
if lines.is_empty() || lines[0] != "event: message" {
|
||||
warn!("Invalid SSE response format from agent {}: expected 'event: message' as first line, got: {:?}", agent.id, lines.first());
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Invalid SSE response format from agent {}: expected 'event: message' as first line",
|
||||
agent.id
|
||||
)));
|
||||
}
|
||||
|
||||
// Find the data line
|
||||
let data_lines: Vec<&str> = lines
|
||||
.iter()
|
||||
.filter(|line| line.starts_with("data: "))
|
||||
.copied()
|
||||
.collect();
|
||||
|
||||
if data_lines.len() != 1 {
|
||||
warn!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
agent.id,
|
||||
data_lines.len()
|
||||
);
|
||||
return Err(PipelineError::NoContentInResponse(format!(
|
||||
"Expected exactly one 'data:' line from agent {}, found {}",
|
||||
agent.id,
|
||||
data_lines.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let data_chunk = &data_lines[0][6..]; // Skip "data: " prefix
|
||||
|
||||
let response: JsonRpcResponse = serde_json::from_str(data_chunk)?;
|
||||
let response_result = response
|
||||
.result
|
||||
.ok_or_else(|| PipelineError::NoChoicesInResponse(agent.id.clone()))?;
|
||||
|
||||
let response_json = response_result
|
||||
.get("structuredContent")
|
||||
.ok_or_else(|| PipelineError::NoChoicesInResponse(agent.id.clone()))?;
|
||||
// Parse the response as JSON to extract the content
|
||||
// let response_json: serde_json::Value = serde_json::from_slice(&response_bytes)?;
|
||||
|
||||
let messages: Vec<Message> = response_json
|
||||
.get("result")
|
||||
.and_then(|v| v.as_array())
|
||||
.ok_or_else(|| PipelineError::NoContentInResponse(agent.id.clone()))?
|
||||
.iter()
|
||||
.map(|msg_value| serde_json::from_value(msg_value.clone()))
|
||||
.collect::<Result<Vec<Message>, _>>()
|
||||
.map_err(PipelineError::ParseError)?;
|
||||
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
async fn get_new_session_id(&self, agent_id: &str) -> String {
|
||||
let initialize_request = JsonRpcRequest {
|
||||
jsonrpc: "2.0".to_string(),
|
||||
id: JsonRpcId::Number(1),
|
||||
method: "initialize".to_string(),
|
||||
params: Some({
|
||||
let mut params = HashMap::new();
|
||||
params.insert(
|
||||
"protocolVersion".to_string(),
|
||||
serde_json::Value::String("2024-11-05".to_string()),
|
||||
);
|
||||
params.insert("capabilities".to_string(), serde_json::json!({}));
|
||||
params.insert(
|
||||
"clientInfo".to_string(),
|
||||
serde_json::json!({
|
||||
"name": "brightstaff",
|
||||
"version": "1.0.0"
|
||||
}),
|
||||
);
|
||||
params
|
||||
}),
|
||||
};
|
||||
|
||||
let request_body = serde_json::to_string(&initialize_request).unwrap();
|
||||
|
||||
info!("Initializing MCP session for agent {}", agent_id);
|
||||
info!("Initialize request body: {}", request_body);
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(format!("{}/mcp", self.url))
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Accept", "application/json, text/event-stream")
|
||||
.header(ARCH_UPSTREAM_HOST_HEADER, agent_id)
|
||||
.body(request_body)
|
||||
.send()
|
||||
.await
|
||||
.expect("Failed to initialize MCP session");
|
||||
|
||||
info!("Initialize response status: {}", response.status());
|
||||
info!("Initialize response headers: {:?}", response.headers());
|
||||
|
||||
let session_id = response
|
||||
.headers()
|
||||
.get("mcp-session-id")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.expect("No mcp-session-id in response")
|
||||
.to_string();
|
||||
|
||||
Ok(content)
|
||||
info!("Created new MCP session for agent {}: {}", agent_id, session_id);
|
||||
|
||||
// Send initialized notification (without id field per JSON-RPC 2.0 spec)
|
||||
let initialized_notification = JsonRpcNotification {
|
||||
jsonrpc: "2.0".to_string(),
|
||||
method: "notifications/initialized".to_string(),
|
||||
params: None,
|
||||
};
|
||||
|
||||
let notification_body = serde_json::to_string(&initialized_notification).unwrap();
|
||||
|
||||
info!("Sending initialized notification: {}", notification_body);
|
||||
|
||||
let notif_response = self
|
||||
.client
|
||||
.post(format!("{}/mcp", self.url))
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Accept", "application/json, text/event-stream")
|
||||
.header("mcp-session-id", &session_id)
|
||||
.header(ARCH_UPSTREAM_HOST_HEADER, agent_id)
|
||||
.body(notification_body)
|
||||
.send()
|
||||
.await
|
||||
.expect("Failed to send initialized notification");
|
||||
|
||||
info!("Initialized notification response status: {}", notif_response.status());
|
||||
|
||||
session_id
|
||||
}
|
||||
|
||||
/// Send request to terminal agent and return the raw response for streaming
|
||||
pub async fn invoke_upstream_agent(
|
||||
pub async fn invoke_terminal_agent(
|
||||
&self,
|
||||
messages: &[Message],
|
||||
original_request: &ChatCompletionsRequest,
|
||||
|
|
@ -169,7 +354,7 @@ impl PipelineProcessor {
|
|||
|
||||
let response = self
|
||||
.client
|
||||
.post(&self.url)
|
||||
.post(format!("{}/v1/chat/completions", self.url))
|
||||
.headers(agent_headers)
|
||||
.body(request_body)
|
||||
.send()
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use brightstaff::handlers::function_calling::{function_calling_chat_handler};
|
|||
use brightstaff::router::llm_router::RouterService;
|
||||
use brightstaff::utils::tracing::init_tracer;
|
||||
use bytes::Bytes;
|
||||
use common::configuration::Configuration;
|
||||
use common::configuration::{Agent, Configuration};
|
||||
use common::consts::{CHAT_COMPLETIONS_PATH, MESSAGES_PATH, OPENAI_RESPONSES_API_PATH};
|
||||
use http_body_util::{combinators::BoxBody, BodyExt, Empty};
|
||||
use hyper::body::Incoming;
|
||||
|
|
@ -63,9 +63,18 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
|
||||
let arch_config = Arc::new(config);
|
||||
|
||||
// combine agents and agent_filters into a single list of agents
|
||||
let all_agents: Vec<Agent> = arch_config
|
||||
.agents
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.chain(arch_config.agent_filters.as_deref().unwrap_or_default())
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let llm_providers = Arc::new(RwLock::new(arch_config.model_providers.clone()));
|
||||
let agents_list = Arc::new(RwLock::new(arch_config.agents.clone()));
|
||||
let agent_filters = Arc::new(RwLock::new(arch_config.agent_filters.clone()));
|
||||
let agents_list = Arc::new(RwLock::new(Some(all_agents)));
|
||||
let listeners = Arc::new(RwLock::new(arch_config.listeners.clone()));
|
||||
|
||||
debug!(
|
||||
|
|
@ -112,7 +121,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
|
||||
let llm_providers = llm_providers.clone();
|
||||
let agents_list = agents_list.clone();
|
||||
let agent_filters = agent_filters.clone();
|
||||
let listeners = listeners.clone();
|
||||
let service = service_fn(move |req| {
|
||||
let router_service = Arc::clone(&router_service);
|
||||
|
|
@ -121,7 +129,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let llm_providers = llm_providers.clone();
|
||||
let model_aliases = Arc::clone(&model_aliases);
|
||||
let agents_list = agents_list.clone();
|
||||
let agent_filters = agent_filters.clone();
|
||||
let listeners = listeners.clone();
|
||||
|
||||
async move {
|
||||
|
|
|
|||
1
crates/build.sh
Normal file
1
crates/build.sh
Normal file
|
|
@ -0,0 +1 @@
|
|||
cargo build --release --target wasm32-wasip1 -p prompt_gateway -p llm_gateway && cargo build --release -p brightstaff
|
||||
|
|
@ -21,16 +21,10 @@ pub struct ModelAlias {
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Agent {
|
||||
pub id: String,
|
||||
pub transport: Option<String>,
|
||||
pub tool: Option<String>,
|
||||
pub url: String,
|
||||
pub kind: Option<String>,
|
||||
pub url: String,
|
||||
pub tool: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AgentFilter {
|
||||
pub id: String,
|
||||
pub url: String,
|
||||
pub tool: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -65,7 +59,7 @@ pub struct Configuration {
|
|||
pub mode: Option<GatewayMode>,
|
||||
pub routing: Option<Routing>,
|
||||
pub agents: Option<Vec<Agent>>,
|
||||
pub agent_filters: Option<Vec<AgentFilter>>,
|
||||
pub agent_filters: Option<Vec<Agent>>,
|
||||
pub listeners: Vec<Listener>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,23 +2,21 @@ version: v0.3.0
|
|||
|
||||
agents:
|
||||
- id: rag_agent
|
||||
url: mcp://host.docker.internal:10501
|
||||
# only sse is supported
|
||||
# transport: sse or stdio
|
||||
# optional tool name, defaults to "invoke"
|
||||
# tool: invoke
|
||||
url: mcp://host.docker.internal:10505
|
||||
- id: travel_agent
|
||||
url: mcp://host.docker.internal:10502
|
||||
transport: streamable-http
|
||||
tool: invoke
|
||||
url: mcp://host.docker.internal:10401
|
||||
|
||||
agent_filters:
|
||||
- id: query_rewriter
|
||||
url: mcp://host.docker.internal:10500
|
||||
# tool is optional, defaults to id
|
||||
# tool: query_rewriter
|
||||
transport: streamable-http
|
||||
tool: query_rewriter
|
||||
url: mcp://host.docker.internal:10501
|
||||
- id: context_builder
|
||||
url: mcp://host.docker.internal:10500
|
||||
- id: input_guards
|
||||
url: mcp://host.docker.internal:10500
|
||||
transport: streamable-http
|
||||
tool: context_builder
|
||||
url: mcp://host.docker.internal:10502
|
||||
|
||||
model_providers:
|
||||
- model: openai/gpt-4o-mini
|
||||
|
|
@ -35,20 +33,20 @@ model_aliases:
|
|||
|
||||
listeners:
|
||||
- type: agent
|
||||
name: agent_1
|
||||
port: 8001
|
||||
router: arch_agent_router
|
||||
agents:
|
||||
- id: rag_agent
|
||||
description: virtual assistant for retrieval augmented generation tasks
|
||||
filter_chain:
|
||||
- input_guards
|
||||
- query_rewriter
|
||||
- context_builder
|
||||
|
||||
- id: travel_agent
|
||||
description: virtual assistant for travel bookings and recommendations
|
||||
filter_chain:
|
||||
- input_guards
|
||||
# - id: travel_agent
|
||||
# description: virtual assistant for travel bookings and recommendations
|
||||
# filter_chain:
|
||||
# - input_guards
|
||||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
|
|
|
|||
86
demos/use_cases/rag_agent/mcp_query.rest
Normal file
86
demos/use_cases/rag_agent/mcp_query.rest
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
### Initialize MCP Session (SSE)
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
|
||||
{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"capabilities":{},"protocolVersion":"2024-11-05","clientInfo":{"name":"test","version":"1.0.0"}}}
|
||||
|
||||
### Send Initialized Notification
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"method": "notifications/initialized"
|
||||
}
|
||||
|
||||
### List Tools
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: eb10a691b36e4547b6c93c5dc5b47e11
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "list-tools-1",
|
||||
"method": "tools/list"
|
||||
}
|
||||
|
||||
### Call Query Rewriter Tool
|
||||
POST http://localhost:10501/mcp
|
||||
Content-Type: application/json
|
||||
Accept: application/json, text/event-stream
|
||||
mcp-session-id: 6b95ff75825a402b90eb3ea07e23fbce
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": "3d3b886a-6216-4a26-a422-7a972529c0e7",
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"arguments": {
|
||||
"messages": [
|
||||
{
|
||||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?",
|
||||
"role": "user"
|
||||
}
|
||||
]
|
||||
},
|
||||
"name": "query_rewriter"
|
||||
}
|
||||
}
|
||||
|
||||
### another test
|
||||
|
||||
# Content-Type: application/json
|
||||
# Accept: application/json, text/event-stream
|
||||
# mcp-session-id: ed7a81a1d39549ecaadb867a6b2daf1e
|
||||
|
||||
POST http://localhost:10501/mcp
|
||||
content-type: application/json
|
||||
mcp-session-id: e4ec1ae904e14e06b7d194da10e5f74c
|
||||
accept: application/json, text/event-stream
|
||||
|
||||
{"jsonrpc":"2.0","id":"4bb1043a-2953-4bcd-b801-f270b0ae8c39","method":"tools/call","params":{"arguments":{"messages":[{"content":"What is the guaranteed uptime percentage for TechCorp's cloud services?","role":"user"}]},"name":"query_rewriter"}}
|
||||
|
||||
|
||||
|
||||
### stream test
|
||||
|
||||
POST http://localhost:10501/mcp
|
||||
content-type: application/json
|
||||
mcp-session-id: 60be9fb816304cb6b9ecdb91d89cd91f
|
||||
accept: application/json, text/event-stream
|
||||
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "long_job",
|
||||
"arguments": {
|
||||
"n": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,7 +7,7 @@ requires-python = ">=3.10"
|
|||
dependencies = [
|
||||
"click>=8.2.1",
|
||||
"mcp>=1.13.1",
|
||||
"fastmcp>=2.12.2",
|
||||
"fastmcp>=2.14",
|
||||
"pydantic>=2.11.7",
|
||||
"fastapi>=0.104.1",
|
||||
"uvicorn>=0.24.0",
|
||||
|
|
|
|||
|
|
@ -1,50 +1,88 @@
|
|||
import click
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
from fastmcp import FastMCP
|
||||
|
||||
mcp = None
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--transport", "transport", default="sse", help="Transport type: stdio or sse")
|
||||
@click.option(
|
||||
"--transport",
|
||||
"transport",
|
||||
default="streamable-http",
|
||||
help="Transport type: stdio or sse",
|
||||
)
|
||||
@click.option("--host", "host", default="localhost", help="Host to bind MCP server to")
|
||||
@click.option("--port", "port", type=int, default=10500, help="Port for MCP server")
|
||||
@click.option("--agent", "agent", required=True, help="Agent name: query_rewriter, context_builder, or response_generator")
|
||||
@click.option("--name", "agent_name", default=None, help="Custom MCP server name (defaults to agent type)")
|
||||
def main(host, port, agent, transport, agent_name):
|
||||
@click.option(
|
||||
"--agent",
|
||||
"agent",
|
||||
required=True,
|
||||
help="Agent name: query_rewriter, context_builder, or response_generator",
|
||||
)
|
||||
@click.option(
|
||||
"--name",
|
||||
"agent_name",
|
||||
default=None,
|
||||
help="Custom MCP server name (defaults to agent type)",
|
||||
)
|
||||
@click.option(
|
||||
"--rest-server",
|
||||
"rest_server",
|
||||
is_flag=True,
|
||||
help="Start REST server instead of MCP server",
|
||||
)
|
||||
@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server")
|
||||
def main(host, port, agent, transport, agent_name, rest_server, rest_port):
|
||||
"""Start a RAG agent as an MCP server."""
|
||||
|
||||
|
||||
# Map friendly names to agent modules
|
||||
agent_map = {
|
||||
"query_rewriter": ("rag_agent.query_rewriter", "Query Rewriter Agent"),
|
||||
"context_builder": ("rag_agent.context_builder_agent", "Context Builder Agent"),
|
||||
"response_generator": ("rag_agent.response_generator", "Response Generator Agent"),
|
||||
"context_builder": ("rag_agent.context_builder", "Context Builder Agent"),
|
||||
"response_generator": (
|
||||
"rag_agent.rag_agent",
|
||||
"Response Generator Agent",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
module_name, default_name = agent_map[agent]
|
||||
mcp_name = agent_name or default_name
|
||||
|
||||
global mcp
|
||||
mcp = FastMCP(mcp_name, host=host, port=port)
|
||||
|
||||
if agent not in agent_map:
|
||||
print(f"Error: Unknown agent '{agent}'")
|
||||
print(f"Available agents: {', '.join(agent_map.keys())}")
|
||||
return
|
||||
|
||||
module_name, default_name = agent_map[agent]
|
||||
mcp_name = agent_name or default_name
|
||||
|
||||
print(f"Starting MCP server: {mcp_name}")
|
||||
print(f" Agent: {agent}")
|
||||
print(f" Transport: {transport}")
|
||||
print(f" Host: {host}")
|
||||
print(f" Port: {port}")
|
||||
|
||||
global mcp
|
||||
mcp = FastMCP(mcp_name, host=host, port=port)
|
||||
|
||||
# Import the agent module to register its tools
|
||||
import importlib
|
||||
importlib.import_module(module_name)
|
||||
|
||||
print(f"Agent '{agent}' loaded successfully")
|
||||
print(f"MCP server ready on {transport}://{host}:{port}")
|
||||
|
||||
mcp.run(transport=transport)
|
||||
|
||||
if rest_server:
|
||||
print(f"Starting REST server on {host}:{rest_port} for agent: {agent}")
|
||||
|
||||
if agent == "response_generator":
|
||||
from rag_agent.rag_agent import start_server
|
||||
|
||||
start_server(host=host, port=rest_port)
|
||||
return
|
||||
else:
|
||||
print("Please specify an agent to start with --agent option.")
|
||||
return
|
||||
else:
|
||||
print(f"Starting MCP server: {mcp_name}")
|
||||
print(f" Agent: {agent}")
|
||||
print(f" Transport: {transport}")
|
||||
print(f" Host: {host}")
|
||||
print(f" Port: {port}")
|
||||
|
||||
# Import the agent module to register its tools
|
||||
import importlib
|
||||
|
||||
importlib.import_module(module_name)
|
||||
|
||||
print(f"Agent '{agent}' loaded successfully")
|
||||
print(f"MCP server ready on {transport}://{host}:{port}")
|
||||
|
||||
mcp.run(transport=transport)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -191,54 +191,30 @@ class Response(BaseModel):
|
|||
# FastAPI app for REST server
|
||||
app = FastAPI(title="RAG Content Builder Agent", version="1.0.0")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
@app.post("/v1/chat/completions")
|
||||
async def context_builder(
|
||||
request_body: ChatCompletionRequest
|
||||
) -> ChatCompletionResponse:
|
||||
""" chat completions endpoint that augments user queries with relevant context from the knowledge base."""
|
||||
async def context_builder(messages: List[ChatMessage]) -> List[ChatMessage]:
|
||||
"""chat completions endpoint that augments user queries with relevant context from the knowledge base."""
|
||||
import time
|
||||
import uuid
|
||||
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
logger.info(f"Received chat completion request with {len(messages)} messages")
|
||||
|
||||
# Get traceparent header from HTTP request using FastMCP's dependency function
|
||||
headers = get_http_headers()
|
||||
traceparent_header = headers.get("traceparent")
|
||||
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Augment the user query with relevant context
|
||||
updated_messages = await augment_query_with_context(
|
||||
request_body.messages, traceparent_header
|
||||
)
|
||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
||||
updated_messages = await augment_query_with_context(messages, traceparent_header)
|
||||
|
||||
response = ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "user", "content": messages_history_json},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
||||
"completion_tokens": len("Context added to user query.".split()),
|
||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
||||
+ len("Context added to user query.".split()),
|
||||
},
|
||||
)
|
||||
|
||||
return response
|
||||
# Return as dict to minimize text serialization
|
||||
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
import json
|
||||
from pydantic import BaseModel
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
|
@ -11,6 +12,9 @@ from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse
|
|||
from . import mcp
|
||||
from fastmcp.server.dependencies import get_http_headers
|
||||
|
||||
from fastmcp.dependencies import CurrentContext
|
||||
from fastmcp.server.context import Context
|
||||
|
||||
# Set up logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
|
|
@ -29,10 +33,11 @@ archgw_client = AsyncOpenAI(
|
|||
api_key="EMPTY", # archgw doesn't require a real API key
|
||||
)
|
||||
|
||||
|
||||
async def rewrite_query_with_archgw(
|
||||
messages: List[ChatMessage], traceparent_header: str
|
||||
) -> str:
|
||||
""" Rewrite the user query using LLM for better retrieval. """
|
||||
"""Rewrite the user query using LLM for better retrieval."""
|
||||
system_prompt = """You are a query rewriter that improves user queries for better retrieval.
|
||||
|
||||
Given a conversation history, rewrite the last user message to be more specific and context-aware.
|
||||
|
|
@ -89,33 +94,31 @@ class Response(BaseModel):
|
|||
app = FastAPI(title="RAG Agent Query Parser", version="1.0.0")
|
||||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
@mcp.tool()
|
||||
async def query_rewriter(request_body: ChatCompletionRequest):
|
||||
"""Chat completions endpoint that rewrites the last user query using archgw."""
|
||||
async def query_rewriter(messages: List[ChatMessage]) -> List[ChatMessage]:
|
||||
"""Chat completions endpoint that rewrites the last user query using archgw.
|
||||
|
||||
Returns a dict with a 'messages' key containing the updated message list.
|
||||
"""
|
||||
import time
|
||||
import uuid
|
||||
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
logger.info(f"Received chat completion request with {len(messages)} messages")
|
||||
|
||||
# Get traceparent header from HTTP request using FastMCP's dependency function
|
||||
headers = get_http_headers()
|
||||
traceparent_header = headers.get("traceparent")
|
||||
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
logger.info("No traceparent header found")
|
||||
|
||||
# Call archgw to rewrite the last user query
|
||||
rewritten_query = await rewrite_query_with_archgw(
|
||||
request_body.messages, traceparent_header
|
||||
)
|
||||
rewritten_query = await rewrite_query_with_archgw(messages, traceparent_header)
|
||||
|
||||
# Create updated messages with the rewritten query
|
||||
updated_messages = request_body.messages.copy()
|
||||
updated_messages = messages.copy()
|
||||
|
||||
# Find and update the last user message with the rewritten query
|
||||
for i in range(len(updated_messages) - 1, -1, -1):
|
||||
|
|
@ -127,28 +130,8 @@ async def query_rewriter(request_body: ChatCompletionRequest):
|
|||
)
|
||||
break
|
||||
|
||||
messages_history_json = json.dumps([msg.dict() for msg in updated_messages])
|
||||
|
||||
response = ChatCompletionResponse(
|
||||
id=f"chatcmpl-{uuid.uuid4().hex[:8]}",
|
||||
created=int(time.time()),
|
||||
model=request_body.model,
|
||||
choices=[
|
||||
{
|
||||
"index": 0,
|
||||
"message": {"role": "user", "content": messages_history_json},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
usage={
|
||||
"prompt_tokens": sum(len(msg.content.split()) for msg in updated_messages),
|
||||
"completion_tokens": len("Updated query for better retrieval.".split()),
|
||||
"total_tokens": sum(len(msg.content.split()) for msg in updated_messages)
|
||||
+ len("Updated query for better retrieval.".split()),
|
||||
},
|
||||
)
|
||||
|
||||
return response
|
||||
# Return as dict to minimize text serialization
|
||||
return [{"role": msg.role, "content": msg.content} for msg in updated_messages]
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
|
|
|||
|
|
@ -63,9 +63,8 @@ def prepare_response_messages(request_body: ChatCompletionRequest):
|
|||
|
||||
|
||||
@app.post("/v1/chat/completions")
|
||||
@mcp.tool(name="invoke")
|
||||
async def chat_completion(request_body: ChatCompletionRequest):
|
||||
"""Chat completions endpoint that generates a coherent response based on all context."""
|
||||
async def chat_completion_http(request_body: ChatCompletionRequest):
|
||||
"""HTTP endpoint for chat completions with streaming support."""
|
||||
logger.info(
|
||||
f"Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
|
@ -73,7 +72,7 @@ async def chat_completion(request_body: ChatCompletionRequest):
|
|||
# Get traceparent header from HTTP request using FastMCP's dependency function
|
||||
headers = get_http_headers()
|
||||
traceparent_header = headers.get("traceparent")
|
||||
|
||||
|
||||
if traceparent_header:
|
||||
logger.info(f"Received traceparent header: {traceparent_header}")
|
||||
else:
|
||||
|
|
@ -92,6 +91,23 @@ async def chat_completion(request_body: ChatCompletionRequest):
|
|||
return await non_streaming_chat_completions(request_body, traceparent_header)
|
||||
|
||||
|
||||
@mcp.tool(name="invoke")
|
||||
async def chat_completion(request_body: ChatCompletionRequest):
|
||||
"""Chat completions endpoint that generates a coherent response based on all context.
|
||||
|
||||
For MCP calls, streaming is collected and returned as a complete response.
|
||||
"""
|
||||
logger.info(
|
||||
f"[MCP] Received chat completion request with {len(request_body.messages)} messages"
|
||||
)
|
||||
|
||||
# For MCP, always use non-streaming to return a complete response
|
||||
response = await non_streaming_chat_completions(
|
||||
request_body, traceparent_header=None
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
async def stream_chat_completions(
|
||||
request_body: ChatCompletionRequest, traceparent_header: str = None
|
||||
):
|
||||
|
|
|
|||
|
|
@ -21,16 +21,25 @@ cleanup() {
|
|||
|
||||
trap cleanup EXIT
|
||||
|
||||
log "Starting query_parser agent on port 10500..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10500 --agent query_parser &
|
||||
# log "Starting input guards filter on port 10500..."
|
||||
# uv run python -m rag_agent --host 0.0.0.0 --port 10500 --agent input_guards &
|
||||
# WAIT_FOR_PIDS+=($!)
|
||||
|
||||
|
||||
log "Starting query_parser agent on port 10501..."
|
||||
uv run python -m rag_agent --host 0.0.0.0 --port 10501 --agent query_rewriter &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting context_builder agent on port 10501..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10501 --agent context_builder &
|
||||
log "Starting context_builder agent on port 10502..."
|
||||
uv run python -m rag_agent --host 0.0.0.0 --port 10502 --agent context_builder &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting response_generator agent on port 10502..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10502 --agent response_generator &
|
||||
# log "Starting response_generator agent on port 10400..."
|
||||
# uv run python -m rag_agent --host 0.0.0.0 --port 10400 --agent response_generator &
|
||||
# WAIT_FOR_PIDS+=($!)
|
||||
|
||||
log "Starting response_generator agent on port 10505..."
|
||||
uv run python -m rag_agent --rest-server --host 0.0.0.0 --rest-port 10505 --agent response_generator &
|
||||
WAIT_FOR_PIDS+=($!)
|
||||
|
||||
for PID in "${WAIT_FOR_PIDS[@]}"; do
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ Content-Type: application/json
|
|||
"content": "What is the guaranteed uptime percentage for TechCorp's cloud services?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
"stream": true
|
||||
}
|
||||
|
||||
### send request to context builder agent
|
||||
|
|
|
|||
832
demos/use_cases/rag_agent/uv.lock
generated
832
demos/use_cases/rag_agent/uv.lock
generated
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue