pending tmo work

This commit is contained in:
Adil Hafeez 2025-09-07 23:26:00 -07:00
parent bb71d041a0
commit 22c84fb689
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
17 changed files with 1626 additions and 94 deletions

View file

@ -7,6 +7,74 @@ properties:
- v0.1
- v0.1.0
- 0.1-beta
- v0.2.0
# sample provider
# llm_providers_v2:
# default:
# listener:
# port: 12000
# protocol: openai
# providers:
# - access_key: ${OPENAI_API_KEY}
# model: openai/gpt-4o
llm_providers_v2:
type: object
properties:
default:
type: object
properties:
listener:
type: object
properties:
port:
type: integer
protocol:
type: string
providers:
type: array
items:
type: object
properties:
name:
type: string
access_key:
type: string
model:
type: string
default:
type: boolean
base_url:
type: string
http_host:
type: string
provider_interface:
type: string
enum:
- arch
- claude
- deepseek
- groq
- mistral
- openai
- gemini
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
additionalProperties: false
required:
- name
- description
additionalProperties: false
required:
- model
listeners:
type: object
additionalProperties: false

View file

@ -98,98 +98,122 @@ def validate_and_render_schema():
llms_with_usage = []
model_name_keys = set()
model_usage_name_keys = set()
for llm_provider in config_yaml["llm_providers"]:
if llm_provider.get("usage", None):
llms_with_usage.append(llm_provider["name"])
if llm_provider.get("name") in llm_provider_name_set:
raise Exception(
f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider"
)
llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
if llm_gateway_listener.get("port") == None:
llm_gateway_listener["port"] = 12000
if llm_gateway_listener and config_yaml["llm_providers_v2"]:
raise Exception("Please provide either egress_traffic or llm_providers_v2, not both")
model_name = llm_provider.get("model")
if model_name in model_name_keys:
raise Exception(
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
)
model_name_keys.add(model_name)
if llm_provider.get("name") is None:
llm_provider["name"] = model_name
if config_yaml["llm_providers"]:
if config_yaml["llm_providers_v2"] is not None:
raise Exception("Please provide either llm_providers or llm_providers_v2, not both")
config_yaml["llm_providers_v2"] = {
"default": {
"listener": {
"port": llm_gateway_listener["port"],
"protocol": llm_gateway_listener.get("message_format", "openai")
},
"providers": config_yaml["llm_providers"]
}
}
model_name_tokens = model_name.split("/")
if len(model_name_tokens) < 2:
raise Exception(
f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id>."
)
provider = model_name_tokens[0]
model_id = "/".join(model_name_tokens[1:])
if provider not in SUPPORTED_PROVIDERS:
if (
llm_provider.get("base_url", None) is None
or llm_provider.get("provider_interface", None) is None
):
raise Exception(
f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
)
provider = llm_provider.get("provider_interface", None)
elif llm_provider.get("provider_interface", None) is not None:
raise Exception(
f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
)
for llm_provider_name, provider_def in config_yaml["llm_providers_v2"].items():
provider_listener = provider_def["listener"]
for llm_provider in provider_def["providers"]:
if llm_provider.get("usage", None):
llms_with_usage.append(llm_provider["name"])
if llm_provider.get("name") in llm_provider_name_set:
raise Exception(
f"Duplicate llm_provider name {llm_provider.get('name')}, please provide unique name for each llm_provider"
)
if model_id in model_name_keys:
raise Exception(
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
)
model_name_keys.add(model_id)
model_name = llm_provider.get("model")
if model_name in model_name_keys:
raise Exception(
f"Duplicate model name {model_name}, please provide unique model name for each llm_provider"
)
model_name_keys.add(model_name)
if llm_provider.get("name") is None:
llm_provider["name"] = model_name
for routing_preference in llm_provider.get("routing_preferences", []):
if routing_preference.get("name") in model_usage_name_keys:
raise Exception(
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
)
model_usage_name_keys.add(routing_preference.get("name"))
model_name_tokens = model_name.split("/")
if len(model_name_tokens) < 2:
raise Exception(
f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id>."
)
provider = model_name_tokens[0]
model_id = "/".join(model_name_tokens[1:])
if provider not in SUPPORTED_PROVIDERS:
if (
llm_provider.get("base_url", None) is None
or llm_provider.get("provider_interface", None) is None
):
raise Exception(
f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
)
provider = llm_provider.get("provider_interface", None)
elif llm_provider.get("provider_interface", None) is not None:
raise Exception(
f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
)
llm_provider["model"] = model_id
llm_provider["provider_interface"] = provider
llm_provider_name_set.add(llm_provider.get("name"))
provider = None
if llm_provider.get("provider") and llm_provider.get("provider_interface"):
raise Exception(
"Please provide either provider or provider_interface, not both"
)
if llm_provider.get("provider"):
provider = llm_provider["provider"]
llm_provider["provider_interface"] = provider
del llm_provider["provider"]
updated_llm_providers.append(llm_provider)
if model_id in model_name_keys:
raise Exception(
f"Duplicate model_id {model_id}, please provide unique model_id for each llm_provider"
)
model_name_keys.add(model_id)
if llm_provider.get("base_url", None):
base_url = llm_provider["base_url"]
urlparse_result = urlparse(base_url)
url_path = urlparse_result.path
if url_path and url_path != "/":
raise Exception(
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
)
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
"http",
"https",
]:
raise Exception(
"Please provide a valid URL with scheme (http/https) in base_url"
)
protocol = urlparse_result.scheme
port = urlparse_result.port
if port is None:
if protocol == "http":
port = 80
else:
port = 443
endpoint = urlparse_result.hostname
llm_provider["endpoint"] = endpoint
llm_provider["port"] = port
llm_provider["protocol"] = protocol
llms_with_endpoint.append(llm_provider)
for routing_preference in llm_provider.get("routing_preferences", []):
if routing_preference.get("name") in model_usage_name_keys:
raise Exception(
f"Duplicate routing preference name \"{routing_preference.get('name')}\", please provide unique name for each routing preference"
)
model_usage_name_keys.add(routing_preference.get("name"))
llm_provider["model"] = model_id
llm_provider["provider_interface"] = provider
llm_provider_name_set.add(llm_provider.get("name"))
provider = None
if llm_provider.get("provider") and llm_provider.get("provider_interface"):
raise Exception(
"Please provide either provider or provider_interface, not both"
)
if llm_provider.get("provider"):
provider = llm_provider["provider"]
llm_provider["provider_interface"] = provider
del llm_provider["provider"]
updated_llm_providers.append(llm_provider)
if llm_provider.get("base_url", None):
base_url = llm_provider["base_url"]
urlparse_result = urlparse(base_url)
url_path = urlparse_result.path
if url_path and url_path != "/":
raise Exception(
f"Please provide base_url without path, got {base_url}. Use base_url like 'http://example.com' instead of 'http://example.com/path'."
)
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
"http",
"https",
]:
raise Exception(
"Please provide a valid URL with scheme (http/https) in base_url"
)
protocol = urlparse_result.scheme
port = urlparse_result.port
if port is None:
if protocol == "http":
port = 80
else:
port = 443
endpoint = urlparse_result.hostname
llm_provider["endpoint"] = endpoint
llm_provider["port"] = port
llm_provider["protocol"] = protocol
llms_with_endpoint.append(llm_provider)
if len(model_usage_name_keys) > 0:
routing_llm_provider = config_yaml.get("routing", {}).get("llm_provider", None)
@ -221,14 +245,6 @@ def validate_and_render_schema():
if prompt_gateway_listener.get("timeout") == None:
prompt_gateway_listener["timeout"] = "10s"
llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
if llm_gateway_listener.get("port") == None:
llm_gateway_listener["port"] = 12000 # default port for llm gateway
if llm_gateway_listener.get("address") == None:
llm_gateway_listener["address"] = "127.0.0.1"
if llm_gateway_listener.get("timeout") == None:
llm_gateway_listener["timeout"] = "10s"
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
"use_agent_orchestrator", False
)

View file

@ -13,11 +13,42 @@ pub struct Routing {
pub model: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tool {
pub name: String,
pub protocol: String,
pub endpoint: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Listener {
pub port: u16,
pub protocol: String,
pub path: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Agent {
pub name: String,
pub description: Option<String>,
pub instructions: Option<String>,
pub tools: Vec<Tool>,
pub listener: Listener,
pub model: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmProviderV2 {
pub listener: Listener,
pub providers: Vec<LlmProvider>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Configuration {
pub version: String,
pub endpoints: Option<HashMap<String, Endpoint>>,
pub llm_providers: Vec<LlmProvider>,
pub llm_providers_v2: Option<HashMap<String, LlmProviderV2>>,
pub overrides: Option<Overrides>,
pub system_prompt: Option<String>,
pub prompt_guards: Option<PromptGuards>,
@ -27,6 +58,7 @@ pub struct Configuration {
pub tracing: Option<Tracing>,
pub mode: Option<GatewayMode>,
pub routing: Option<Routing>,
pub agents: Option<Agent>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]

View file

View file

@ -0,0 +1,38 @@
version: v0.1.0
agents:
- name: rag_assistant
description: t-mobile virtual assistant for device contracts.
instructions: |
You are a virtual assistant, here to help users answer questions from device contracts team.
Use following instructions to process the user request,
1. Use query_processor_agent to understand user queries
2. Use search_documents to fetch relevant information
3. Use response_generator_agent to formulate clear responses
model: openai/gpt-4o
tools:
- name: query_processor_agent
# Parses user queries and extracts metadata, also handles clarification workflow
protocol: mcp
endpoint: https://localhost:10500
- name: search_documents
# Searches the document store for relevant information
protocol: mcp
endpoint: https://localhost:10501
- name: response_generator_agent
# Generates a final response based on user query and retrieved context
protocol: mcp
endpoint: https://localhost:10502
listener:
port: 8000
protocol: openai
path: /v1/chat/completions
llm_providers_v2:
default:
listener:
port: 12000
protocol: openai
providers:
- access_key: ${OPENAI_API_KEY}
model: openai/gpt-4o

View file

@ -0,0 +1,4 @@
# Netscape HTTP Cookie File
# https://curl.se/docs/http-cookies.html
# This file was generated by libcurl! Edit at your own risk.

View file

@ -0,0 +1,19 @@
[project]
name = "rag_agent"
version = "0.1.0"
description = "RAG Agent"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"click>=8.2.1",
"mcp>=1.13.1",
"fastmcp>=2.12.2",
"pydantic>=2.11.7",
]
[project.scripts]
rag_agent = "rag_agent:main"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

View file

@ -0,0 +1,30 @@
import click
from mcp.server.fastmcp import FastMCP
mcp = None
@click.command()
@click.option('--transport', 'transport', default='stdio')
@click.option('--host', 'host', default='localhost')
@click.option('--port', 'port', default=10101)
@click.option('--agent', 'agent', default=None)
def main(host, port, agent, transport):
print(f"Starting agent(s): {agent if agent else 'all'}")
global mcp
mcp = FastMCP("RAG Agent Demo", host=host, port=port)
if agent == "query_parser":
import rag_agent.query_parser
elif agent == "document_store":
import rag_agent.document_store
elif agent == "response_generator":
import rag_agent.response_generator
else:
import rag_agent.query_parser
import rag_agent.document_store
import rag_agent.response_generator
print("All agents loaded.")
mcp.run(transport=transport)
if __name__ == '__main__':
main()

View file

@ -0,0 +1,16 @@
from pydantic import BaseModel
from . import mcp
class QueryRequest(BaseModel):
query: str
metadata: dict | None = None
class QueryResponse(BaseModel):
query: str
results: list
@mcp.tool()
def query_rag_store(request: QueryRequest):
"""Query the RAG document store."""
return {"query": request.query, "results": []}

View file

@ -0,0 +1,13 @@
from pydantic import BaseModel
from . import mcp
class Response(BaseModel):
query: str
metadata: dict
@mcp.tool()
def parse_query(query):
"""Parse the user query and returns metadata extracted from query."""
return Response(query=query, metadata={
"is_valid": True
})

View file

@ -0,0 +1,6 @@
from . import mcp
@mcp.tool()
def generate_response(query, context):
"""Generate a response based on the user query and context."""
return {"query": query, "context": context, "response": "This is a generated response."}

View file

@ -0,0 +1,35 @@
# Step 1: Initialize session
POST http://localhost:10101/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
{
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2024-11-05",
"capabilities": {},
"clientInfo": {
"name": "ExampleClient",
"version": "1.0.0"
}
}
}
HTTP 200
[Captures]
session_id: header "mcp-session-id"
# # Step 2: List tools (use session ID from previous response)
POST http://localhost:10101/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: 07603206a9b44a3d91d76f6b16f24faa
{
"jsonrpc": "2.0",
"id": 2,
"method": "tools/list",
"params": {}
}

View file

@ -0,0 +1,31 @@
### Step 1: Initialize session
POST http://localhost:10101/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
{
"jsonrpc": "2.0",
"id": 1,
"method": "initialize",
"params": {
"protocolVersion": "2024-11-05",
"capabilities": {},
"clientInfo": {
"name": "ExampleClient",
"version": "1.0.0"
}
}
}
### Step 2: List tools (use session ID from previous response)
POST http://localhost:10101/mcp
Content-Type: application/json
Accept: application/json, text/event-stream
mcp-session-id: af2e2dace64c48f99ac3536faeaa3c68
{
"jsonrpc": "2.0",
"id": 2,
"method": "tools/list",
"params": {}
}

1224
demos/use_cases/rag_agent/uv.lock generated Normal file

File diff suppressed because it is too large Load diff