diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 5c2fd420..732d05a6 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -286,6 +286,100 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + - name: agents_traffic + address: + socket_address: + address: 0.0.0.0 + port_value: 8001 + traffic_direction: OUTBOUND + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: arch_gateway + random_sampling: + value: {{ arch_tracing.random_sampling }} + {% endif %} + stat_prefix: agents_traffic + codec_type: AUTO + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/access_llm.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + - match: + prefix: "/healthz" + direct_response: + status: 200 + - match: + prefix: "/" + route: + auto_host_rewrite: true + prefix_rewrite: "/agents/" + cluster: bright_staff + timeout: {{ llm_gateway_listener.timeout }} + http_filters: + - name: envoy.filters.http.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor + compressor_library: + name: envoy.compression.brotli.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli + - name: envoy.filters.http.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor + compressor_library: + name: compress + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip + memory_level: 3 + window_bits: 10 + - name: envoy.filters.http.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor + decompressor_library: + name: decompress + typed_config: + "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" + window_bits: 9 + chunk_size: 8192 + # If this ratio is set too low, then body data will not be decompressed completely. + max_inflate_ratio: 1000 + - name: envoy.filters.http.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor + decompressor_library: + name: envoy.compression.brotli.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + + - name: egress_traffic address: socket_address: diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py index 59d42ab4..eb1ed377 100644 --- a/arch/tools/cli/core.py +++ b/arch/tools/cli/core.py @@ -1,3 +1,4 @@ +import json import subprocess import os import time @@ -25,7 +26,7 @@ from cli.docker_cli import ( log = getLogger(__name__) -def _get_gateway_ports(arch_config_file: str) -> tuple: +def _get_gateway_ports(arch_config_file: str) -> list[int]: PROMPT_GATEWAY_DEFAULT_PORT = 10000 LLM_GATEWAY_DEFAULT_PORT = 12000 @@ -34,18 +35,13 @@ def _get_gateway_ports(arch_config_file: str) -> tuple: with open(arch_config_file) as f: arch_config_dict = yaml.safe_load(f) - prompt_gateway_port = ( - arch_config_dict.get("listeners", {}) - .get("ingress_traffic", {}) - .get("port", PROMPT_GATEWAY_DEFAULT_PORT) - ) - llm_gateway_port = ( - arch_config_dict.get("listeners", {}) - .get("egress_traffic", {}) - .get("port", LLM_GATEWAY_DEFAULT_PORT) - ) + print("arch config dict json string: ", json.dumps(arch_config_dict)) - return prompt_gateway_port, llm_gateway_port + all_ports = [ + listener.get("port") for listener in arch_config_dict.get("listeners", []) + ] + + return all_ports def start_arch(arch_config_file, env, log_timeout=120, foreground=False): @@ -67,14 +63,13 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): docker_stop_container(ARCHGW_DOCKER_NAME) docker_remove_container(ARCHGW_DOCKER_NAME) - prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file) + gateway_ports = _get_gateway_ports(arch_config_file) return_code, _, archgw_stderr = docker_start_archgw_detached( arch_config_file, os.path.expanduser("~/archgw_logs"), env, - prompt_gateway_port, - llm_gateway_port, + gateway_ports, ) if return_code != 0: log.info("Failed to start arch gateway: " + str(return_code)) @@ -83,13 +78,17 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): start_time = time.time() while True: - prompt_gateway_health_check_status = health_check_endpoint( - f"http://localhost:{prompt_gateway_port}/healthz" - ) - - llm_gateway_health_check_status = health_check_endpoint( - f"http://localhost:{llm_gateway_port}/healthz" - ) + all_listeners_healthy = True + for port in gateway_ports: + log.info(f"Checking health endpoint on port {port}") + health_check_status = health_check_endpoint( + f"http://localhost:{port}/healthz" + ) + if health_check_status: + log.info(f"Gateway on port {port} is healthy!") + else: + all_listeners_healthy = False + log.info(f"Gateway on port {port} is not healthy yet.") archgw_status = docker_container_status(ARCHGW_DOCKER_NAME) current_time = time.time() @@ -106,7 +105,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): stream_gateway_logs(follow=False) sys.exit(1) - if prompt_gateway_health_check_status or llm_gateway_health_check_status: + if all_listeners_healthy: log.info("archgw is running and is healthy!") break else: diff --git a/arch/tools/cli/docker_cli.py b/arch/tools/cli/docker_cli.py index e8a12a13..873a2641 100644 --- a/arch/tools/cli/docker_cli.py +++ b/arch/tools/cli/docker_cli.py @@ -44,17 +44,18 @@ def docker_start_archgw_detached( arch_config_file: str, logs_path_abs: str, env: dict, - prompt_gateway_port, - llm_gateway_port, + gateway_ports: list[int], ) -> str: env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]] port_mappings = [ - f"{prompt_gateway_port}:{prompt_gateway_port}", - f"{llm_gateway_port}:{llm_gateway_port}", - f"{llm_gateway_port+1}:{llm_gateway_port+1}", + f"{12001}:{12001}", "19901:9901", ] + + for port in gateway_ports: + port_mappings.append(f"{port}:{port}") + port_mappings_args = [item for port in port_mappings for item in ("-p", port)] volume_mappings = [ diff --git a/arch/tools/cli/utils.py b/arch/tools/cli/utils.py index 019e181c..e8bdb3a7 100644 --- a/arch/tools/cli/utils.py +++ b/arch/tools/cli/utils.py @@ -28,9 +28,9 @@ def get_llm_provider_access_keys(arch_config_file): access_key_list = [] for llm_provider in arch_config_yaml.get("llm_providers", []): - acess_key = llm_provider.get("access_key") - if acess_key is not None: - access_key_list.append(acess_key) + access_key = llm_provider.get("access_key") + if access_key is not None: + access_key_list.append(access_key) for prompt_target in arch_config_yaml.get("prompt_targets", []): for k, v in prompt_target.get("endpoint", {}).get("http_headers", {}).items(): @@ -44,6 +44,12 @@ def get_llm_provider_access_keys(arch_config_file): else: access_key_list.append(v) + for listener in arch_config_yaml.get("listeners", []): + for llm_provider in listener.get("llm_providers", []): + access_key = llm_provider.get("access_key") + if access_key is not None: + access_key_list.append(access_key) + return access_key_list diff --git a/crates/brightstaff/src/handlers/agent_chat_completions.rs b/crates/brightstaff/src/handlers/agent_chat_completions.rs new file mode 100644 index 00000000..c39be3fb --- /dev/null +++ b/crates/brightstaff/src/handlers/agent_chat_completions.rs @@ -0,0 +1,278 @@ +use std::sync::Arc; + +use bytes::Bytes; +use common::api::open_ai::{ChatCompletionsResponse, Choice}; +use common::configuration::ModelUsagePreference; +use common::consts::ARCH_PROVIDER_HINT_HEADER; +use hermesllm::apis::openai::ChatCompletionsRequest; +use hermesllm::apis::{Role, Usage}; +use hermesllm::clients::SupportedAPIs; +use hermesllm::{ProviderRequest, ProviderRequestType}; +use http_body_util::combinators::BoxBody; +use http_body_util::{BodyExt, Full, StreamBody}; +use hyper::body::Frame; +use hyper::header::{self}; +use hyper::{Request, Response, StatusCode}; +use serde::{ser::SerializeMap, Deserialize, Serialize}; +use tokio::sync::mpsc; +use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::StreamExt; +use tracing::{debug, info, warn}; + +use crate::router::llm_router::RouterService; + +fn full>(chunk: T) -> BoxBody { + Full::new(chunk.into()) + .map_err(|never| match never {}) + .boxed() +} + +pub async fn agent_chat( + request: Request, + router_service: Arc, + full_qualified_llm_provider_url: String, + agents_list: Arc>>>, + listeners: Arc>>, +) -> Result>, hyper::Error> { + // find listener that is running at port 8001 for agents + let listener = { + let listeners = listeners.read().await; + listeners.iter().find(|l| l.port == 8001).cloned() + } + .unwrap(); + + let request_path = request.uri().path().to_string(); + let mut request_headers = request.headers().clone(); + let chat_request_bytes = request.collect().await?.to_bytes(); + + debug!( + "Received request body (raw utf8): {}", + String::from_utf8_lossy(&chat_request_bytes) + ); + + let chat_completions_request: ChatCompletionsRequest = + match serde_json::from_slice(&chat_request_bytes) { + Ok(req) => req, + Err(err) => { + warn!( + "Failed to parse request body as ChatCompletionsRequest: {}", + err + ); + let err_msg = format!("Failed to parse request body: {}", err); + let mut bad_request = Response::new(full(err_msg)); + *bad_request.status_mut() = StatusCode::BAD_REQUEST; + return Ok(bad_request); + } + }; + + let agent_name_map = { + let agents = agents_list.read().await; + let agents = agents.as_ref().unwrap(); + let mut map = std::collections::HashMap::new(); + for agent in agents.iter() { + map.insert(agent.name.clone(), agent.clone()); + } + map + }; + + // find agent to answer the request + let agent_pipeline = listener.agents.as_ref().unwrap()[0].clone(); // for now, just take the first agent pipeline + + // process agent pipeline + + debug!("Processing agent pipeline: {}", agent_pipeline.name); + + let mut chat_completions_history = chat_completions_request.messages.clone(); + let mut last_response: Option = None; + + for agent_name in agent_pipeline.filter_chain { + debug!("Processing agent: {}", agent_name); + let agent = agent_name_map.get(&agent_name).unwrap(); + debug!("Agent details: {:?}", agent); + + let path = format!( + "{}/v1/chat/completions", + agent.endpoint.trim_end_matches('/') + ); + + let mut request = chat_completions_request.clone(); + request.messages = chat_completions_history.clone(); + + let request_str = serde_json::to_string(&request).unwrap(); + debug!("Sending request to agent {}: {}", agent_name, request_str); + + let response = match reqwest::Client::new() + .post(path) + .body(request_str) + .send() + .await + { + Ok(res) => res, + Err(err) => { + let err_msg = format!("Failed to send request: {}", err); + let mut internal_error = Response::new(full(err_msg)); + *internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + return Ok(internal_error); + } + }; + + let response_bytes = match response.bytes().await { + Ok(bytes) => bytes, + Err(err) => { + let err_msg = format!("Failed to read response bytes: {}", err); + let mut internal_error = Response::new(full(err_msg)); + *internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + return Ok(internal_error); + } + }; + + let chat_completions_response: hermesllm::apis::openai::ChatCompletionsResponse = + match serde_json::from_slice(&response_bytes) { + Ok(res) => res, + Err(err) => { + let err_msg = format!("Failed to parse response body: {}", err); + let mut internal_error = Response::new(full(err_msg)); + *internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + return Ok(internal_error); + } + }; + + let response_str = chat_completions_response.choices[0] + .message + .content + .clone() + .unwrap(); + + debug!( + "Received response from agent {}: {}", + agent_name, response_str + ); + + chat_completions_history = serde_json::from_str(response_str.as_str()).unwrap_or(vec![]); + + // chat_completions_history.append(&mut vec![hermesllm::apis::openai::Message { + // role: hermesllm::apis::openai::Role::Assistant, + // content: hermesllm::apis::openai::MessageContent::Text(response_str), + // name: Some(agent_name.clone()), + // tool_calls: None, + // tool_call_id: None, + // }]); + } + + let last_response: Option = match chat_completions_history.last() { + Some(msg) => Some(msg.content.clone().to_string()), + None => None, + }; + + let chat_completion_response: hermesllm::apis::openai::ChatCompletionsResponse = + hermesllm::apis::openai::ChatCompletionsResponse { + model: "arch-agent".to_string(), + choices: vec![hermesllm::apis::openai::Choice { + index: 0, + finish_reason: None, + message: { + hermesllm::apis::openai::ResponseMessage { + role: hermesllm::apis::openai::Role::Assistant, + content: last_response, + refusal: None, + annotations: None, + audio: None, + function_call: None, + tool_calls: None, + } + }, + logprobs: None, + }], + usage: hermesllm::apis::openai::Usage { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + prompt_tokens_details: None, + completion_tokens_details: None, + }, + id: "00".to_string(), + object: "chat.completion".to_string(), + created: 0, + system_fingerprint: None, + service_tier: None, + }; + + let response_body = serde_json::to_string(&chat_completion_response).unwrap(); + + return Ok(Response::new(full(response_body))); + + // request_headers.insert( + // ARCH_PROVIDER_HINT_HEADER, + // header::HeaderValue::from_str(&model_name).unwrap(), + // ); + + // if let Some(trace_parent) = trace_parent { + // request_headers.insert( + // header::HeaderName::from_static("traceparent"), + // header::HeaderValue::from_str(&trace_parent).unwrap(), + // ); + // } + // // remove content-length header if it exists + // request_headers.remove(header::CONTENT_LENGTH); + + // let llm_response = match reqwest::Client::new() + // .post(full_qualified_llm_provider_url) + // .headers(request_headers) + // .body(client_request_bytes_for_upstream) + // .send() + // .await + // { + // Ok(res) => res, + // Err(err) => { + // let err_msg = format!("Failed to send request: {}", err); + // let mut internal_error = Response::new(full(err_msg)); + // *internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + // return Ok(internal_error); + // } + // }; + + // // copy over the headers from the original response + // let response_headers = llm_response.headers().clone(); + // let mut response = Response::builder(); + // let headers = response.headers_mut().unwrap(); + // for (header_name, header_value) in response_headers.iter() { + // headers.insert(header_name, header_value.clone()); + // } + + // // channel to create async stream + // let (tx, rx) = mpsc::channel::(16); + + // // Spawn a task to send data as it becomes available + // tokio::spawn(async move { + // let mut byte_stream = llm_response.bytes_stream(); + + // while let Some(item) = byte_stream.next().await { + // let item = match item { + // Ok(item) => item, + // Err(err) => { + // warn!("Error receiving chunk: {:?}", err); + // break; + // } + // }; + + // if tx.send(item).await.is_err() { + // warn!("Receiver dropped"); + // break; + // } + // } + // }); + + // let stream = ReceiverStream::new(rx).map(|chunk| Ok::<_, hyper::Error>(Frame::data(chunk))); + + // let stream_body = BoxBody::new(StreamBody::new(stream)); + + // match response.body(stream_body) { + // Ok(response) => Ok(response), + // Err(err) => { + // let err_msg = format!("Failed to create response: {}", err); + // let mut internal_error = Response::new(full(err_msg)); + // *internal_error.status_mut() = StatusCode::INTERNAL_SERVER_ERROR; + // Ok(internal_error) + // } + // } +} diff --git a/crates/brightstaff/src/handlers/mod.rs b/crates/brightstaff/src/handlers/mod.rs index 6de38b5b..6fe1404d 100644 --- a/crates/brightstaff/src/handlers/mod.rs +++ b/crates/brightstaff/src/handlers/mod.rs @@ -1,2 +1,3 @@ pub mod chat_completions; pub mod models; +pub mod agent_chat_completions; diff --git a/crates/brightstaff/src/main.rs b/crates/brightstaff/src/main.rs index d3843125..032977c9 100644 --- a/crates/brightstaff/src/main.rs +++ b/crates/brightstaff/src/main.rs @@ -1,3 +1,4 @@ +use brightstaff::handlers::agent_chat_completions::agent_chat; use brightstaff::handlers::chat_completions::chat; use brightstaff::handlers::models::list_models; use brightstaff::router::llm_router::RouterService; @@ -62,6 +63,8 @@ async fn main() -> Result<(), Box> { let arch_config = Arc::new(config); let llm_providers = Arc::new(RwLock::new(arch_config.llm_providers.clone())); + let agents_list = Arc::new(RwLock::new(arch_config.agents.clone())); + let listeners = Arc::new(RwLock::new(arch_config.listeners.clone())); debug!( "arch_config: {:?}", @@ -103,12 +106,16 @@ async fn main() -> Result<(), Box> { let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); + let agents_list = agents_list.clone(); + let listeners = listeners.clone(); let service = service_fn(move |req| { let router_service = Arc::clone(&router_service); let parent_cx = extract_context_from_request(&req); let llm_provider_url = llm_provider_url.clone(); let llm_providers = llm_providers.clone(); + let agents_list = agents_list.clone(); + let listeners = listeners.clone(); async move { match (req.method(), req.uri().path()) { @@ -118,6 +125,12 @@ async fn main() -> Result<(), Box> { .with_context(parent_cx) .await } + (&Method::POST, "/agents/v1/chat/completions") => { + let fully_qualified_url = format!("{}{}", llm_provider_url, req.uri().path()); + agent_chat(req, router_service, fully_qualified_url, agents_list, listeners) + .with_context(parent_cx) + .await + } (&Method::GET, "/v1/models") => Ok(list_models(llm_providers).await), (&Method::OPTIONS, "/v1/models") => { let mut response = Response::new(empty()); @@ -143,6 +156,7 @@ async fn main() -> Result<(), Box> { Ok(response) } _ => { + debug!("No route for {} {}", req.method(), req.uri().path()); let mut not_found = Response::new(empty()); *not_found.status_mut() = StatusCode::NOT_FOUND; Ok(not_found) diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 93f4fd38..37d3d002 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -13,6 +13,28 @@ pub struct Routing { pub model: Option, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Agent { + pub name: String, + pub kind: String, + pub endpoint: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AgentPipeline { + pub name: String, + pub description: Option, + pub filter_chain: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Listener { + pub name: String, + pub router: Option, + pub agents: Option>, + pub port: u16, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Configuration { pub version: String, @@ -27,6 +49,8 @@ pub struct Configuration { pub tracing: Option, pub mode: Option, pub routing: Option, + pub agents: Option>, + pub listeners: Vec, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] diff --git a/demos/use_cases/rag_agent/arch_config.yaml b/demos/use_cases/rag_agent/arch_config.yaml index c351fd9f..09b7915c 100644 --- a/demos/use_cases/rag_agent/arch_config.yaml +++ b/demos/use_cases/rag_agent/arch_config.yaml @@ -24,6 +24,11 @@ listeners: - query_rewriter - context_builder - response_generator + - name: research_agent + description: deep research agent that can perform searches and gather information. + filter_chain: + - research_agent + - response_generator port: 8001 - name: egress_traffic @@ -31,7 +36,7 @@ listeners: port: 12000 protocol: openai llm_providers: - - access_key: ${OPENAI_API_KEY} + - access_key: $OPENAI_API_KEY model: openai/gpt-4o - - access_key: ${OPENAI_API_KEY} + - access_key: $OPENAI_API_KEY model: openai/gpt-4o-mini diff --git a/demos/use_cases/rag_agent/src/rag_agent/__init__.py b/demos/use_cases/rag_agent/src/rag_agent/__init__.py index 429be7b0..464ec926 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/__init__.py +++ b/demos/use_cases/rag_agent/src/rag_agent/__init__.py @@ -18,12 +18,26 @@ mcp = None @click.option("--rest-port", "rest_port", default=8000, help="Port for REST server") def main(host, port, agent, transport, rest_server, rest_port): if rest_server: - print(f"Starting REST server on {host}:{rest_port}") + print(f"Starting REST server on {host}:{rest_port} for agent: {agent}") - from rag_agent.query_parser import start_server + if agent == "query_parser": + from rag_agent.query_rewriter_agent import start_server - start_server(host=host, port=rest_port) - return + start_server(host=host, port=rest_port) + return + elif agent == "content_builder": + from rag_agent.content_builder_agent import start_server + + start_server(host=host, port=rest_port) + return + elif agent == "response_generator": + from rag_agent.response_generator_agent import start_server + + start_server(host=host, port=rest_port) + return + else: + print("Please specify an agent to start with --agent option.") + return print(f"Starting agent(s): {agent if agent else 'all'}") global mcp diff --git a/demos/use_cases/rag_agent/src/rag_agent/content_builder_agent.py b/demos/use_cases/rag_agent/src/rag_agent/content_builder_agent.py index 76998a7a..fc715314 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/content_builder_agent.py +++ b/demos/use_cases/rag_agent/src/rag_agent/content_builder_agent.py @@ -1,3 +1,4 @@ +import json from pydantic import BaseModel from typing import List, Optional, Dict, Any from fastapi import FastAPI, HTTPException @@ -6,6 +7,7 @@ import os import logging import csv from pathlib import Path +import uvicorn from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse @@ -37,6 +39,8 @@ def load_knowledge_base(): current_dir = Path(__file__).parent csv_path = current_dir / "basis_of_truth.csv" + print(f"Loading knowledge base from {csv_path}") + try: knowledge_base = [] with open(csv_path, "r", encoding="utf-8") as file: @@ -174,7 +178,7 @@ app = FastAPI(title="RAG Content Builder Agent", version="1.0.0") @app.post("/v1/chat/completions") -async def chat_completions(request: ChatCompletionRequest): +async def chat_completions(request: ChatCompletionRequest) -> ChatCompletionResponse: """Chat completions endpoint that augments user queries with relevant context from the knowledge base.""" import time import uuid @@ -185,6 +189,7 @@ async def chat_completions(request: ChatCompletionRequest): # Augment the user query with relevant context updated_messages = await augment_query_with_context(request.messages) + messages_history_json = json.dumps([msg.dict() for msg in updated_messages]) response = ChatCompletionResponse( id=f"chatcmpl-{uuid.uuid4().hex[:8]}", @@ -193,10 +198,7 @@ async def chat_completions(request: ChatCompletionRequest): choices=[ { "index": 0, - "messages": [ - {"role": msg.role, "content": msg.content} - for msg in updated_messages - ], + "message": {"role": "user", "content": messages_history_json}, "finish_reason": "stop", } ], @@ -214,10 +216,15 @@ async def chat_completions(request: ChatCompletionRequest): def main(): """Main function to initialize the knowledge base and start the server.""" load_knowledge_base() - import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) if __name__ == "__main__": main() + + +def start_server(host: str = "localhost", port: int = 8000): + """Start the REST server.""" + load_knowledge_base() + uvicorn.run(app, host=host, port=port) diff --git a/demos/use_cases/rag_agent/src/rag_agent/query_rewriter_agent.py b/demos/use_cases/rag_agent/src/rag_agent/query_rewriter_agent.py index eadc66b6..66eec6b8 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/query_rewriter_agent.py +++ b/demos/use_cases/rag_agent/src/rag_agent/query_rewriter_agent.py @@ -1,9 +1,11 @@ +import json from pydantic import BaseModel from typing import List, Optional, Dict, Any from fastapi import FastAPI, HTTPException from openai import AsyncOpenAI import os import logging +import uvicorn from .api import ChatMessage, ChatCompletionRequest, ChatCompletionResponse @@ -104,6 +106,8 @@ async def chat_completions(request: ChatCompletionRequest): ) break + messages_history_json = json.dumps([msg.dict() for msg in updated_messages]) + response = ChatCompletionResponse( id=f"chatcmpl-{uuid.uuid4().hex[:8]}", created=int(time.time()), @@ -111,10 +115,7 @@ async def chat_completions(request: ChatCompletionRequest): choices=[ { "index": 0, - "messages": [ - {"role": msg.role, "content": msg.content} - for msg in updated_messages - ], + "message": {"role": "user", "content": messages_history_json}, "finish_reason": "stop", } ], @@ -138,3 +139,8 @@ async def health_check(): def parse_query(query): """Parse the user query and returns metadata extracted from query.""" return Response(query=query, metadata={"is_valid": True}) + + +def start_server(host: str = "localhost", port: int = 8000): + """Start the REST server.""" + uvicorn.run(app, host=host, port=port) diff --git a/demos/use_cases/rag_agent/src/rag_agent/response_generator_agent.py b/demos/use_cases/rag_agent/src/rag_agent/response_generator_agent.py index b3189ace..4e823edb 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/response_generator_agent.py +++ b/demos/use_cases/rag_agent/src/rag_agent/response_generator_agent.py @@ -1,9 +1,11 @@ +import json from fastapi import FastAPI from openai import AsyncOpenAI import os import logging import time import uuid +import uvicorn from .api import ChatCompletionRequest, ChatCompletionResponse @@ -64,6 +66,8 @@ async def chat_completions(request: ChatCompletionRequest): generated_response = response.choices[0].message.content.strip() logger.info(f"Response generated successfully") + updated_history = [{"role": "assistant", "content": generated_response}] + return ChatCompletionResponse( id=f"chatcmpl-{uuid.uuid4().hex[:8]}", created=int(time.time()), @@ -71,7 +75,10 @@ async def chat_completions(request: ChatCompletionRequest): choices=[ { "index": 0, - "message": {"role": "assistant", "content": generated_response}, + "message": { + "role": "assistant", + "content": json.dumps(updated_history), + }, "finish_reason": "stop", } ], @@ -120,3 +127,8 @@ async def chat_completions(request: ChatCompletionRequest): async def health_check(): """Health check endpoint.""" return {"status": "healthy"} + + +def start_server(host: str = "localhost", port: int = 8000): + """Start the REST server.""" + uvicorn.run(app, host=host, port=port) diff --git a/demos/use_cases/rag_agent/uv.lock b/demos/use_cases/rag_agent/uv.lock index d17d0533..e90a6ad6 100644 --- a/demos/use_cases/rag_agent/uv.lock +++ b/demos/use_cases/rag_agent/uv.lock @@ -260,6 +260,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/63/67/ac57fbef5414ce84fe0bdeb497918ab2c781ff2cbf23c1bd91334b225669/cyclopts-3.23.1-py3-none-any.whl", hash = "sha256:8e57c6ea47d72b4b565c6a6c8a9fd56ed048ab4316627991230f4ad24ce2bc29", size = 85222 }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + [[package]] name = "dnspython" version = "2.7.0" @@ -412,6 +421,78 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 }, ] +[[package]] +name = "jiter" +version = "0.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/9d/ae7ddb4b8ab3fb1b51faf4deb36cb48a4fbbd7cb36bad6a5fca4741306f7/jiter-0.10.0.tar.gz", hash = "sha256:07a7142c38aacc85194391108dc91b5b57093c978a9932bd86a36862759d9500", size = 162759 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/7e/4011b5c77bec97cb2b572f566220364e3e21b51c48c5bd9c4a9c26b41b67/jiter-0.10.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:cd2fb72b02478f06a900a5782de2ef47e0396b3e1f7d5aba30daeb1fce66f303", size = 317215 }, + { url = "https://files.pythonhosted.org/packages/8a/4f/144c1b57c39692efc7ea7d8e247acf28e47d0912800b34d0ad815f6b2824/jiter-0.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:32bb468e3af278f095d3fa5b90314728a6916d89ba3d0ffb726dd9bf7367285e", size = 322814 }, + { url = "https://files.pythonhosted.org/packages/63/1f/db977336d332a9406c0b1f0b82be6f71f72526a806cbb2281baf201d38e3/jiter-0.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa8b3e0068c26ddedc7abc6fac37da2d0af16b921e288a5a613f4b86f050354f", size = 345237 }, + { url = "https://files.pythonhosted.org/packages/d7/1c/aa30a4a775e8a672ad7f21532bdbfb269f0706b39c6ff14e1f86bdd9e5ff/jiter-0.10.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:286299b74cc49e25cd42eea19b72aa82c515d2f2ee12d11392c56d8701f52224", size = 370999 }, + { url = "https://files.pythonhosted.org/packages/35/df/f8257abc4207830cb18880781b5f5b716bad5b2a22fb4330cfd357407c5b/jiter-0.10.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ed5649ceeaeffc28d87fb012d25a4cd356dcd53eff5acff1f0466b831dda2a7", size = 491109 }, + { url = "https://files.pythonhosted.org/packages/06/76/9e1516fd7b4278aa13a2cc7f159e56befbea9aa65c71586305e7afa8b0b3/jiter-0.10.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2ab0051160cb758a70716448908ef14ad476c3774bd03ddce075f3c1f90a3d6", size = 388608 }, + { url = "https://files.pythonhosted.org/packages/6d/64/67750672b4354ca20ca18d3d1ccf2c62a072e8a2d452ac3cf8ced73571ef/jiter-0.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03997d2f37f6b67d2f5c475da4412be584e1cec273c1cfc03d642c46db43f8cf", size = 352454 }, + { url = "https://files.pythonhosted.org/packages/96/4d/5c4e36d48f169a54b53a305114be3efa2bbffd33b648cd1478a688f639c1/jiter-0.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c404a99352d839fed80d6afd6c1d66071f3bacaaa5c4268983fc10f769112e90", size = 391833 }, + { url = "https://files.pythonhosted.org/packages/0b/de/ce4a6166a78810bd83763d2fa13f85f73cbd3743a325469a4a9289af6dae/jiter-0.10.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:66e989410b6666d3ddb27a74c7e50d0829704ede652fd4c858e91f8d64b403d0", size = 523646 }, + { url = "https://files.pythonhosted.org/packages/a2/a6/3bc9acce53466972964cf4ad85efecb94f9244539ab6da1107f7aed82934/jiter-0.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b532d3af9ef4f6374609a3bcb5e05a1951d3bf6190dc6b176fdb277c9bbf15ee", size = 514735 }, + { url = "https://files.pythonhosted.org/packages/b4/d8/243c2ab8426a2a4dea85ba2a2ba43df379ccece2145320dfd4799b9633c5/jiter-0.10.0-cp310-cp310-win32.whl", hash = "sha256:da9be20b333970e28b72edc4dff63d4fec3398e05770fb3205f7fb460eb48dd4", size = 210747 }, + { url = "https://files.pythonhosted.org/packages/37/7a/8021bd615ef7788b98fc76ff533eaac846322c170e93cbffa01979197a45/jiter-0.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59e533afed0c5b0ac3eba20d2548c4a550336d8282ee69eb07b37ea526ee4e5", size = 207484 }, + { url = "https://files.pythonhosted.org/packages/1b/dd/6cefc6bd68b1c3c979cecfa7029ab582b57690a31cd2f346c4d0ce7951b6/jiter-0.10.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3bebe0c558e19902c96e99217e0b8e8b17d570906e72ed8a87170bc290b1e978", size = 317473 }, + { url = "https://files.pythonhosted.org/packages/be/cf/fc33f5159ce132be1d8dd57251a1ec7a631c7df4bd11e1cd198308c6ae32/jiter-0.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:558cc7e44fd8e507a236bee6a02fa17199ba752874400a0ca6cd6e2196cdb7dc", size = 321971 }, + { url = "https://files.pythonhosted.org/packages/68/a4/da3f150cf1d51f6c472616fb7650429c7ce053e0c962b41b68557fdf6379/jiter-0.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d613e4b379a07d7c8453c5712ce7014e86c6ac93d990a0b8e7377e18505e98d", size = 345574 }, + { url = "https://files.pythonhosted.org/packages/84/34/6e8d412e60ff06b186040e77da5f83bc158e9735759fcae65b37d681f28b/jiter-0.10.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f62cf8ba0618eda841b9bf61797f21c5ebd15a7a1e19daab76e4e4b498d515b2", size = 371028 }, + { url = "https://files.pythonhosted.org/packages/fb/d9/9ee86173aae4576c35a2f50ae930d2ccb4c4c236f6cb9353267aa1d626b7/jiter-0.10.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:919d139cdfa8ae8945112398511cb7fca58a77382617d279556b344867a37e61", size = 491083 }, + { url = "https://files.pythonhosted.org/packages/d9/2c/f955de55e74771493ac9e188b0f731524c6a995dffdcb8c255b89c6fb74b/jiter-0.10.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13ddbc6ae311175a3b03bd8994881bc4635c923754932918e18da841632349db", size = 388821 }, + { url = "https://files.pythonhosted.org/packages/81/5a/0e73541b6edd3f4aada586c24e50626c7815c561a7ba337d6a7eb0a915b4/jiter-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c440ea003ad10927a30521a9062ce10b5479592e8a70da27f21eeb457b4a9c5", size = 352174 }, + { url = "https://files.pythonhosted.org/packages/1c/c0/61eeec33b8c75b31cae42be14d44f9e6fe3ac15a4e58010256ac3abf3638/jiter-0.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc347c87944983481e138dea467c0551080c86b9d21de6ea9306efb12ca8f606", size = 391869 }, + { url = "https://files.pythonhosted.org/packages/41/22/5beb5ee4ad4ef7d86f5ea5b4509f680a20706c4a7659e74344777efb7739/jiter-0.10.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:13252b58c1f4d8c5b63ab103c03d909e8e1e7842d302473f482915d95fefd605", size = 523741 }, + { url = "https://files.pythonhosted.org/packages/ea/10/768e8818538e5817c637b0df52e54366ec4cebc3346108a4457ea7a98f32/jiter-0.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d1bbf3c465de4a24ab12fb7766a0003f6f9bce48b8b6a886158c4d569452dc5", size = 514527 }, + { url = "https://files.pythonhosted.org/packages/73/6d/29b7c2dc76ce93cbedabfd842fc9096d01a0550c52692dfc33d3cc889815/jiter-0.10.0-cp311-cp311-win32.whl", hash = "sha256:db16e4848b7e826edca4ccdd5b145939758dadf0dc06e7007ad0e9cfb5928ae7", size = 210765 }, + { url = "https://files.pythonhosted.org/packages/c2/c9/d394706deb4c660137caf13e33d05a031d734eb99c051142e039d8ceb794/jiter-0.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c9c1d5f10e18909e993f9641f12fe1c77b3e9b533ee94ffa970acc14ded3812", size = 209234 }, + { url = "https://files.pythonhosted.org/packages/6d/b5/348b3313c58f5fbfb2194eb4d07e46a35748ba6e5b3b3046143f3040bafa/jiter-0.10.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:1e274728e4a5345a6dde2d343c8da018b9d4bd4350f5a472fa91f66fda44911b", size = 312262 }, + { url = "https://files.pythonhosted.org/packages/9c/4a/6a2397096162b21645162825f058d1709a02965606e537e3304b02742e9b/jiter-0.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7202ae396446c988cb2a5feb33a543ab2165b786ac97f53b59aafb803fef0744", size = 320124 }, + { url = "https://files.pythonhosted.org/packages/2a/85/1ce02cade7516b726dd88f59a4ee46914bf79d1676d1228ef2002ed2f1c9/jiter-0.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23ba7722d6748b6920ed02a8f1726fb4b33e0fd2f3f621816a8b486c66410ab2", size = 345330 }, + { url = "https://files.pythonhosted.org/packages/75/d0/bb6b4f209a77190ce10ea8d7e50bf3725fc16d3372d0a9f11985a2b23eff/jiter-0.10.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:371eab43c0a288537d30e1f0b193bc4eca90439fc08a022dd83e5e07500ed026", size = 369670 }, + { url = "https://files.pythonhosted.org/packages/a0/f5/a61787da9b8847a601e6827fbc42ecb12be2c925ced3252c8ffcb56afcaf/jiter-0.10.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6c675736059020365cebc845a820214765162728b51ab1e03a1b7b3abb70f74c", size = 489057 }, + { url = "https://files.pythonhosted.org/packages/12/e4/6f906272810a7b21406c760a53aadbe52e99ee070fc5c0cb191e316de30b/jiter-0.10.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c5867d40ab716e4684858e4887489685968a47e3ba222e44cde6e4a2154f959", size = 389372 }, + { url = "https://files.pythonhosted.org/packages/e2/ba/77013b0b8ba904bf3762f11e0129b8928bff7f978a81838dfcc958ad5728/jiter-0.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:395bb9a26111b60141757d874d27fdea01b17e8fac958b91c20128ba8f4acc8a", size = 352038 }, + { url = "https://files.pythonhosted.org/packages/67/27/c62568e3ccb03368dbcc44a1ef3a423cb86778a4389e995125d3d1aaa0a4/jiter-0.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6842184aed5cdb07e0c7e20e5bdcfafe33515ee1741a6835353bb45fe5d1bd95", size = 391538 }, + { url = "https://files.pythonhosted.org/packages/c0/72/0d6b7e31fc17a8fdce76164884edef0698ba556b8eb0af9546ae1a06b91d/jiter-0.10.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:62755d1bcea9876770d4df713d82606c8c1a3dca88ff39046b85a048566d56ea", size = 523557 }, + { url = "https://files.pythonhosted.org/packages/2f/09/bc1661fbbcbeb6244bd2904ff3a06f340aa77a2b94e5a7373fd165960ea3/jiter-0.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:533efbce2cacec78d5ba73a41756beff8431dfa1694b6346ce7af3a12c42202b", size = 514202 }, + { url = "https://files.pythonhosted.org/packages/1b/84/5a5d5400e9d4d54b8004c9673bbe4403928a00d28529ff35b19e9d176b19/jiter-0.10.0-cp312-cp312-win32.whl", hash = "sha256:8be921f0cadd245e981b964dfbcd6fd4bc4e254cdc069490416dd7a2632ecc01", size = 211781 }, + { url = "https://files.pythonhosted.org/packages/9b/52/7ec47455e26f2d6e5f2ea4951a0652c06e5b995c291f723973ae9e724a65/jiter-0.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:a7c7d785ae9dda68c2678532a5a1581347e9c15362ae9f6e68f3fdbfb64f2e49", size = 206176 }, + { url = "https://files.pythonhosted.org/packages/2e/b0/279597e7a270e8d22623fea6c5d4eeac328e7d95c236ed51a2b884c54f70/jiter-0.10.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e0588107ec8e11b6f5ef0e0d656fb2803ac6cf94a96b2b9fc675c0e3ab5e8644", size = 311617 }, + { url = "https://files.pythonhosted.org/packages/91/e3/0916334936f356d605f54cc164af4060e3e7094364add445a3bc79335d46/jiter-0.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cafc4628b616dc32530c20ee53d71589816cf385dd9449633e910d596b1f5c8a", size = 318947 }, + { url = "https://files.pythonhosted.org/packages/6a/8e/fd94e8c02d0e94539b7d669a7ebbd2776e51f329bb2c84d4385e8063a2ad/jiter-0.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:520ef6d981172693786a49ff5b09eda72a42e539f14788124a07530f785c3ad6", size = 344618 }, + { url = "https://files.pythonhosted.org/packages/6f/b0/f9f0a2ec42c6e9c2e61c327824687f1e2415b767e1089c1d9135f43816bd/jiter-0.10.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:554dedfd05937f8fc45d17ebdf298fe7e0c77458232bcb73d9fbbf4c6455f5b3", size = 368829 }, + { url = "https://files.pythonhosted.org/packages/e8/57/5bbcd5331910595ad53b9fd0c610392ac68692176f05ae48d6ce5c852967/jiter-0.10.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5bc299da7789deacf95f64052d97f75c16d4fc8c4c214a22bf8d859a4288a1c2", size = 491034 }, + { url = "https://files.pythonhosted.org/packages/9b/be/c393df00e6e6e9e623a73551774449f2f23b6ec6a502a3297aeeece2c65a/jiter-0.10.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5161e201172de298a8a1baad95eb85db4fb90e902353b1f6a41d64ea64644e25", size = 388529 }, + { url = "https://files.pythonhosted.org/packages/42/3e/df2235c54d365434c7f150b986a6e35f41ebdc2f95acea3036d99613025d/jiter-0.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e2227db6ba93cb3e2bf67c87e594adde0609f146344e8207e8730364db27041", size = 350671 }, + { url = "https://files.pythonhosted.org/packages/c6/77/71b0b24cbcc28f55ab4dbfe029f9a5b73aeadaba677843fc6dc9ed2b1d0a/jiter-0.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:15acb267ea5e2c64515574b06a8bf393fbfee6a50eb1673614aa45f4613c0cca", size = 390864 }, + { url = "https://files.pythonhosted.org/packages/6a/d3/ef774b6969b9b6178e1d1e7a89a3bd37d241f3d3ec5f8deb37bbd203714a/jiter-0.10.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:901b92f2e2947dc6dfcb52fd624453862e16665ea909a08398dde19c0731b7f4", size = 522989 }, + { url = "https://files.pythonhosted.org/packages/0c/41/9becdb1d8dd5d854142f45a9d71949ed7e87a8e312b0bede2de849388cb9/jiter-0.10.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d0cb9a125d5a3ec971a094a845eadde2db0de85b33c9f13eb94a0c63d463879e", size = 513495 }, + { url = "https://files.pythonhosted.org/packages/9c/36/3468e5a18238bdedae7c4d19461265b5e9b8e288d3f86cd89d00cbb48686/jiter-0.10.0-cp313-cp313-win32.whl", hash = "sha256:48a403277ad1ee208fb930bdf91745e4d2d6e47253eedc96e2559d1e6527006d", size = 211289 }, + { url = "https://files.pythonhosted.org/packages/7e/07/1c96b623128bcb913706e294adb5f768fb7baf8db5e1338ce7b4ee8c78ef/jiter-0.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:75f9eb72ecb640619c29bf714e78c9c46c9c4eaafd644bf78577ede459f330d4", size = 205074 }, + { url = "https://files.pythonhosted.org/packages/54/46/caa2c1342655f57d8f0f2519774c6d67132205909c65e9aa8255e1d7b4f4/jiter-0.10.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:28ed2a4c05a1f32ef0e1d24c2611330219fed727dae01789f4a335617634b1ca", size = 318225 }, + { url = "https://files.pythonhosted.org/packages/43/84/c7d44c75767e18946219ba2d703a5a32ab37b0bc21886a97bc6062e4da42/jiter-0.10.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14a4c418b1ec86a195f1ca69da8b23e8926c752b685af665ce30777233dfe070", size = 350235 }, + { url = "https://files.pythonhosted.org/packages/01/16/f5a0135ccd968b480daad0e6ab34b0c7c5ba3bc447e5088152696140dcb3/jiter-0.10.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d7bfed2fe1fe0e4dda6ef682cee888ba444b21e7a6553e03252e4feb6cf0adca", size = 207278 }, + { url = "https://files.pythonhosted.org/packages/1c/9b/1d646da42c3de6c2188fdaa15bce8ecb22b635904fc68be025e21249ba44/jiter-0.10.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:5e9251a5e83fab8d87799d3e1a46cb4b7f2919b895c6f4483629ed2446f66522", size = 310866 }, + { url = "https://files.pythonhosted.org/packages/ad/0e/26538b158e8a7c7987e94e7aeb2999e2e82b1f9d2e1f6e9874ddf71ebda0/jiter-0.10.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:023aa0204126fe5b87ccbcd75c8a0d0261b9abdbbf46d55e7ae9f8e22424eeb8", size = 318772 }, + { url = "https://files.pythonhosted.org/packages/7b/fb/d302893151caa1c2636d6574d213e4b34e31fd077af6050a9c5cbb42f6fb/jiter-0.10.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c189c4f1779c05f75fc17c0c1267594ed918996a231593a21a5ca5438445216", size = 344534 }, + { url = "https://files.pythonhosted.org/packages/01/d8/5780b64a149d74e347c5128d82176eb1e3241b1391ac07935693466d6219/jiter-0.10.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:15720084d90d1098ca0229352607cd68256c76991f6b374af96f36920eae13c4", size = 369087 }, + { url = "https://files.pythonhosted.org/packages/e8/5b/f235a1437445160e777544f3ade57544daf96ba7e96c1a5b24a6f7ac7004/jiter-0.10.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4f2fb68e5f1cfee30e2b2a09549a00683e0fde4c6a2ab88c94072fc33cb7426", size = 490694 }, + { url = "https://files.pythonhosted.org/packages/85/a9/9c3d4617caa2ff89cf61b41e83820c27ebb3f7b5fae8a72901e8cd6ff9be/jiter-0.10.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce541693355fc6da424c08b7edf39a2895f58d6ea17d92cc2b168d20907dee12", size = 388992 }, + { url = "https://files.pythonhosted.org/packages/68/b1/344fd14049ba5c94526540af7eb661871f9c54d5f5601ff41a959b9a0bbd/jiter-0.10.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31c50c40272e189d50006ad5c73883caabb73d4e9748a688b216e85a9a9ca3b9", size = 351723 }, + { url = "https://files.pythonhosted.org/packages/41/89/4c0e345041186f82a31aee7b9d4219a910df672b9fef26f129f0cda07a29/jiter-0.10.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fa3402a2ff9815960e0372a47b75c76979d74402448509ccd49a275fa983ef8a", size = 392215 }, + { url = "https://files.pythonhosted.org/packages/55/58/ee607863e18d3f895feb802154a2177d7e823a7103f000df182e0f718b38/jiter-0.10.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:1956f934dca32d7bb647ea21d06d93ca40868b505c228556d3373cbd255ce853", size = 522762 }, + { url = "https://files.pythonhosted.org/packages/15/d0/9123fb41825490d16929e73c212de9a42913d68324a8ce3c8476cae7ac9d/jiter-0.10.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:fcedb049bdfc555e261d6f65a6abe1d5ad68825b7202ccb9692636c70fcced86", size = 513427 }, + { url = "https://files.pythonhosted.org/packages/d8/b3/2bd02071c5a2430d0b70403a34411fc519c2f227da7b03da9ba6a956f931/jiter-0.10.0-cp314-cp314-win32.whl", hash = "sha256:ac509f7eccca54b2a29daeb516fb95b6f0bd0d0d8084efaf8ed5dfc7b9f0b357", size = 210127 }, + { url = "https://files.pythonhosted.org/packages/03/0c/5fe86614ea050c3ecd728ab4035534387cd41e7c1855ef6c031f1ca93e3f/jiter-0.10.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5ed975b83a2b8639356151cef5c0d597c68376fc4922b45d0eb384ac058cfa00", size = 318527 }, + { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213 }, +] + [[package]] name = "jsonschema" version = "4.25.1" @@ -609,6 +690,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/8e/469e5a4a2f5855992e425f3cb33804cc07bf18d48f2db061aec61ce50270/more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b", size = 69667 }, ] +[[package]] +name = "openai" +version = "1.107.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "jiter" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f3/e0/a62daa7ff769df969cc1b782852cace79615039630b297005356f5fb46fb/openai-1.107.1.tar.gz", hash = "sha256:7c51b6b8adadfcf5cada08a613423575258b180af5ad4bc2954b36ebc0d3ad48", size = 563671 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/12/32c19999a58eec4a695e8ce334442b6135df949f0bb61b2ceaa4fa60d3a9/openai-1.107.1-py3-none-any.whl", hash = "sha256:168f9885b1b70d13ada0868a0d0adfd538c16a02f7fd9fe063851a2c9a025e72", size = 945177 }, +] + [[package]] name = "openapi-core" version = "0.19.5" @@ -926,6 +1026,7 @@ dependencies = [ { name = "fastapi" }, { name = "fastmcp" }, { name = "mcp" }, + { name = "openai" }, { name = "pydantic" }, { name = "uvicorn" }, ] @@ -936,6 +1037,7 @@ requires-dist = [ { name = "fastapi", specifier = ">=0.104.1" }, { name = "fastmcp", specifier = ">=2.12.2" }, { name = "mcp", specifier = ">=1.13.1" }, + { name = "openai", specifier = ">=1.0.0" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "uvicorn", specifier = ">=0.24.0" }, ] @@ -1185,6 +1287,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/fd/901cfa59aaa5b30a99e16876f11abe38b59a1a2c51ffb3d7142bb6089069/starlette-0.47.3-py3-none-any.whl", hash = "sha256:89c0778ca62a76b826101e7c709e70680a1699ca7da6b44d38eb0a7e61fe4b51", size = 72991 }, ] +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, +] + [[package]] name = "typing-extensions" version = "4.15.0"