2024-12-20 13:25:01 -08:00
import json
2024-10-03 18:21:27 -07:00
import os
from jinja2 import Environment , FileSystemLoader
import yaml
from jsonschema import validate
2025-03-05 17:20:04 -08:00
from urllib . parse import urlparse
2025-09-10 23:42:23 -07:00
from copy import deepcopy
2024-10-03 18:21:27 -07:00
2025-07-11 16:42:16 -07:00
SUPPORTED_PROVIDERS = [
" arch " ,
" deepseek " ,
" groq " ,
" mistral " ,
" openai " ,
" gemini " ,
2025-09-10 07:40:30 -07:00
" anthropic " ,
2025-07-11 16:42:16 -07:00
]
2024-10-09 11:25:07 -07:00
2025-02-03 14:51:59 -08:00
def get_endpoint_and_port ( endpoint , protocol ) :
endpoint_tokens = endpoint . split ( " : " )
if len ( endpoint_tokens ) > 1 :
endpoint = endpoint_tokens [ 0 ]
port = int ( endpoint_tokens [ 1 ] )
return endpoint , port
else :
if protocol == " http " :
port = 80
else :
port = 443
return endpoint , port
2024-10-03 18:21:27 -07:00
def validate_and_render_schema ( ) :
2025-07-11 16:42:16 -07:00
ENVOY_CONFIG_TEMPLATE_FILE = os . getenv (
" ENVOY_CONFIG_TEMPLATE_FILE " , " envoy.template.yaml "
)
ARCH_CONFIG_FILE = os . getenv ( " ARCH_CONFIG_FILE " , " /app/arch_config.yaml " )
ARCH_CONFIG_FILE_RENDERED = os . getenv (
" ARCH_CONFIG_FILE_RENDERED " , " /app/arch_config_rendered.yaml "
)
ENVOY_CONFIG_FILE_RENDERED = os . getenv (
" ENVOY_CONFIG_FILE_RENDERED " , " /etc/envoy/envoy.yaml "
)
ARCH_CONFIG_SCHEMA_FILE = os . getenv (
" ARCH_CONFIG_SCHEMA_FILE " , " arch_config_schema.yaml "
)
env = Environment ( loader = FileSystemLoader ( os . getenv ( " TEMPLATE_ROOT " , " ./ " ) ) )
template = env . get_template ( ENVOY_CONFIG_TEMPLATE_FILE )
2024-10-03 18:21:27 -07:00
try :
validate_prompt_config ( ARCH_CONFIG_FILE , ARCH_CONFIG_SCHEMA_FILE )
except Exception as e :
2024-10-30 17:54:51 -07:00
print ( str ( e ) )
2024-10-09 11:25:07 -07:00
exit ( 1 ) # validate_prompt_config failed. Exit
2024-10-03 18:21:27 -07:00
2024-10-09 11:25:07 -07:00
with open ( ARCH_CONFIG_FILE , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config = file . read ( )
2024-10-09 11:25:07 -07:00
with open ( ARCH_CONFIG_SCHEMA_FILE , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config_schema = file . read ( )
config_yaml = yaml . safe_load ( arch_config )
2025-03-19 15:21:34 -07:00
_ = yaml . safe_load ( arch_config_schema )
2024-10-03 18:21:27 -07:00
inferred_clusters = { }
2024-12-20 13:25:01 -08:00
endpoints = config_yaml . get ( " endpoints " , { } )
2025-09-15 13:16:02 -07:00
# Process agents section and convert to endpoints
agents = config_yaml . get ( " agents " , [ ] )
for agent in agents :
agent_name = agent . get ( " name " )
agent_endpoint = agent . get ( " endpoint " )
if agent_name and agent_endpoint :
urlparse_result = urlparse ( agent_endpoint )
if urlparse_result . scheme and urlparse_result . hostname :
protocol = urlparse_result . scheme
port = urlparse_result . port
if port is None :
if protocol == " http " :
port = 80
else :
port = 443
endpoints [ agent_name ] = {
" endpoint " : urlparse_result . hostname ,
" port " : port ,
" protocol " : protocol ,
}
2024-12-20 13:25:01 -08:00
# override the inferred clusters with the ones defined in the config
for name , endpoint_details in endpoints . items ( ) :
inferred_clusters [ name ] = endpoint_details
2025-09-15 13:16:02 -07:00
# Only call get_endpoint_and_port for manually defined endpoints, not agent-derived ones
if " port " not in endpoint_details :
endpoint = inferred_clusters [ name ] [ " endpoint " ]
protocol = inferred_clusters [ name ] . get ( " protocol " , " http " )
(
inferred_clusters [ name ] [ " endpoint " ] ,
inferred_clusters [ name ] [ " port " ] ,
) = get_endpoint_and_port ( endpoint , protocol )
2024-12-20 13:25:01 -08:00
print ( " defined clusters from arch_config.yaml: " , json . dumps ( inferred_clusters ) )
2024-10-28 20:05:06 -04:00
if " prompt_targets " in config_yaml :
for prompt_target in config_yaml [ " prompt_targets " ] :
2024-12-06 14:37:33 -08:00
name = prompt_target . get ( " endpoint " , { } ) . get ( " name " , None )
if not name :
continue
2024-10-28 20:05:06 -04:00
if name not in inferred_clusters :
2024-12-20 13:25:01 -08:00
raise Exception (
f " Unknown endpoint { name } , please add it in endpoints section in your arch_config.yaml file "
)
2024-10-03 18:21:27 -07:00
2024-10-08 16:24:08 -07:00
arch_tracing = config_yaml . get ( " tracing " , { } )
2025-01-17 18:25:55 -08:00
llms_with_endpoint = [ ]
updated_llm_providers = [ ]
2025-05-23 00:51:53 -07:00
llm_provider_name_set = set ( )
2025-05-23 08:46:12 -07:00
llms_with_usage = [ ]
2025-07-11 16:42:16 -07:00
model_name_keys = set ( )
model_usage_name_keys = set ( )
2025-09-07 23:26:45 -07:00
2025-09-15 16:01:48 -07:00
# legacy listeners
2025-09-10 16:38:07 -07:00
# check if type is array or object
# if its dict its legacy format let's convert it to array
2025-09-10 23:42:23 -07:00
prompt_gateway_listener = {
" name " : " ingress_traffic " ,
" port " : 10000 ,
" address " : " 0.0.0.0 " ,
" timeout " : " 30s " ,
" protocol " : " openai " ,
}
llm_gateway_listener = {
" name " : " egress_traffic " ,
" port " : 12000 ,
" address " : " 0.0.0.0 " ,
" timeout " : " 30s " ,
" llm_providers " : [ ] ,
" protocol " : " openai " ,
}
2025-09-10 16:38:07 -07:00
if isinstance ( config_yaml [ " listeners " ] , dict ) :
ingress_traffic = config_yaml [ " listeners " ] . get ( " ingress_traffic " , None )
2025-09-10 23:42:23 -07:00
egress_traffic = config_yaml [ " listeners " ] . get ( " egress_traffic " , { } )
2025-09-10 16:38:07 -07:00
config_yaml [ " listeners " ] = [ ]
2025-09-10 23:42:23 -07:00
llm_providers = [ ]
if config_yaml . get ( " llm_providers " ) :
llm_providers = config_yaml [ " llm_providers " ]
del config_yaml [ " llm_providers " ]
llm_gateway_listener [ " port " ] = egress_traffic . get (
" port " , llm_gateway_listener [ " port " ]
)
llm_gateway_listener [ " address " ] = egress_traffic . get (
" address " , llm_gateway_listener [ " address " ]
)
llm_gateway_listener [ " timeout " ] = egress_traffic . get (
" timeout " , llm_gateway_listener [ " timeout " ]
)
llm_gateway_listener [ " llm_providers " ] = llm_providers
config_yaml [ " listeners " ] . append ( llm_gateway_listener )
2025-09-10 16:38:07 -07:00
if ingress_traffic :
2025-09-10 23:42:23 -07:00
prompt_gateway_listener [ " port " ] = ingress_traffic . get (
" port " , prompt_gateway_listener [ " port " ]
)
prompt_gateway_listener [ " address " ] = ingress_traffic . get (
" address " , prompt_gateway_listener [ " address " ]
)
prompt_gateway_listener [ " timeout " ] = ingress_traffic . get (
" timeout " , prompt_gateway_listener [ " timeout " ]
)
2025-09-10 16:38:07 -07:00
config_yaml [ " listeners " ] . append ( prompt_gateway_listener )
2025-09-07 23:26:00 -07:00
2025-09-10 16:38:07 -07:00
for listener in config_yaml [ " listeners " ] :
print ( " Processing listener: " , listener )
name = listener . get ( " name " , None )
2025-09-07 23:26:45 -07:00
2025-09-10 23:42:23 -07:00
# TODO: for now we only support llm_providers under egress_traffic listener
if name != " egress_traffic " :
continue
2025-09-10 16:38:07 -07:00
for llm_provider in listener . get ( " llm_providers " , [ ] ) :
2025-09-07 23:26:45 -07:00
if llm_provider . get ( " usage " , None ) :
llms_with_usage . append ( llm_provider [ " name " ] )
if llm_provider . get ( " name " ) in llm_provider_name_set :
raise Exception (
f " Duplicate llm_provider name { llm_provider . get ( ' name ' ) } , please provide unique name for each llm_provider "
)
model_name = llm_provider . get ( " model " )
if model_name in model_name_keys :
raise Exception (
f " Duplicate model name { model_name } , please provide unique model name for each llm_provider "
)
model_name_keys . add ( model_name )
if llm_provider . get ( " name " ) is None :
llm_provider [ " name " ] = model_name
model_name_tokens = model_name . split ( " / " )
if len ( model_name_tokens ) < 2 :
raise Exception (
f " Invalid model name { model_name } . Please provide model name in the format <provider>/<model_id>. "
)
provider = model_name_tokens [ 0 ]
model_id = " / " . join ( model_name_tokens [ 1 : ] )
if provider not in SUPPORTED_PROVIDERS :
if (
llm_provider . get ( " base_url " , None ) is None
or llm_provider . get ( " provider_interface " , None ) is None
) :
raise Exception (
f " Must provide base_url and provider_interface for unsupported provider { provider } for model { model_name } . Supported providers are: { ' , ' . join ( SUPPORTED_PROVIDERS ) } "
)
provider = llm_provider . get ( " provider_interface " , None )
elif llm_provider . get ( " provider_interface " , None ) is not None :
raise Exception (
f " Please provide provider interface as part of model name { model_name } using the format <provider>/<model_id>. For example, use ' openai/gpt-3.5-turbo ' instead of ' gpt-3.5-turbo ' "
)
if model_id in model_name_keys :
raise Exception (
f " Duplicate model_id { model_id } , please provide unique model_id for each llm_provider "
)
model_name_keys . add ( model_id )
for routing_preference in llm_provider . get ( " routing_preferences " , [ ] ) :
if routing_preference . get ( " name " ) in model_usage_name_keys :
raise Exception (
f " Duplicate routing preference name \" { routing_preference . get ( ' name ' ) } \" , please provide unique name for each routing preference "
)
model_usage_name_keys . add ( routing_preference . get ( " name " ) )
llm_provider [ " model " ] = model_id
llm_provider [ " provider_interface " ] = provider
llm_provider_name_set . add ( llm_provider . get ( " name " ) )
provider = None
if llm_provider . get ( " provider " ) and llm_provider . get ( " provider_interface " ) :
raise Exception (
" Please provide either provider or provider_interface, not both "
)
if llm_provider . get ( " provider " ) :
provider = llm_provider [ " provider " ]
llm_provider [ " provider_interface " ] = provider
del llm_provider [ " provider " ]
updated_llm_providers . append ( llm_provider )
if llm_provider . get ( " base_url " , None ) :
base_url = llm_provider [ " base_url " ]
urlparse_result = urlparse ( base_url )
url_path = urlparse_result . path
if url_path and url_path != " / " :
raise Exception (
f " Please provide base_url without path, got { base_url } . Use base_url like ' http://example.com ' instead of ' http://example.com/path ' . "
)
if urlparse_result . scheme == " " or urlparse_result . scheme not in [
" http " ,
" https " ,
] :
raise Exception (
" Please provide a valid URL with scheme (http/https) in base_url "
)
protocol = urlparse_result . scheme
port = urlparse_result . port
if port is None :
if protocol == " http " :
port = 80
else :
port = 443
endpoint = urlparse_result . hostname
llm_provider [ " endpoint " ] = endpoint
llm_provider [ " port " ] = port
llm_provider [ " protocol " ] = protocol
llms_with_endpoint . append ( llm_provider )
2025-05-23 08:46:12 -07:00
2025-07-11 16:42:16 -07:00
if len ( model_usage_name_keys ) > 0 :
2025-07-08 00:33:40 -07:00
routing_llm_provider = config_yaml . get ( " routing " , { } ) . get ( " llm_provider " , None )
if routing_llm_provider and routing_llm_provider not in llm_provider_name_set :
raise Exception (
f " Routing llm_provider { routing_llm_provider } is not defined in llm_providers "
)
if routing_llm_provider is None and " arch-router " not in llm_provider_name_set :
updated_llm_providers . append (
{
" name " : " arch-router " ,
" provider_interface " : " arch " ,
" model " : config_yaml . get ( " routing " , { } ) . get ( " model " , " Arch-Router " ) ,
}
)
2025-01-17 18:25:55 -08:00
2025-09-10 17:10:15 -07:00
for listener in config_yaml [ " listeners " ] :
2025-09-10 23:42:23 -07:00
print ( " Processing listener: " , listener )
2025-09-10 17:10:15 -07:00
if listener . get ( " name " ) == " egress_traffic " :
2025-09-10 23:42:23 -07:00
print ( " processing egress traffic listener " )
print ( " updated_llm_providers: " , updated_llm_providers )
listener [ " llm_providers " ] = deepcopy ( updated_llm_providers )
config_yaml [ " llm_providers " ] = updated_llm_providers
2025-01-17 18:25:55 -08:00
2024-10-03 18:21:27 -07:00
arch_config_string = yaml . dump ( config_yaml )
2024-10-09 15:47:32 -07:00
arch_llm_config_string = yaml . dump ( config_yaml )
2024-10-03 18:21:27 -07:00
2025-09-10 16:38:07 -07:00
# prompt_gateway_listener = config_yaml.get("listeners", {}).get(
# "ingress_traffic", {}
# )
# if prompt_gateway_listener.get("port") == None:
# prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
# if prompt_gateway_listener.get("address") == None:
# prompt_gateway_listener["address"] = "127.0.0.1"
# if prompt_gateway_listener.get("timeout") == None:
# prompt_gateway_listener["timeout"] = "10s"
2025-02-14 19:28:10 -08:00
2025-03-19 15:21:34 -07:00
use_agent_orchestrator = config_yaml . get ( " overrides " , { } ) . get (
" use_agent_orchestrator " , False
)
agent_orchestrator = None
if use_agent_orchestrator :
print ( " Using agent orchestrator " )
if len ( endpoints ) == 0 :
raise Exception (
" Please provide agent orchestrator in the endpoints section in your arch_config.yaml file "
)
elif len ( endpoints ) > 1 :
raise Exception (
" Please provide single agent orchestrator in the endpoints section in your arch_config.yaml file "
)
else :
agent_orchestrator = list ( endpoints . keys ( ) ) [ 0 ]
print ( " agent_orchestrator: " , agent_orchestrator )
2025-07-11 16:42:16 -07:00
2024-10-03 18:21:27 -07:00
data = {
2025-02-14 19:28:10 -08:00
" prompt_gateway_listener " : prompt_gateway_listener ,
" llm_gateway_listener " : llm_gateway_listener ,
2024-10-09 11:25:07 -07:00
" arch_config " : arch_config_string ,
2024-10-09 15:47:32 -07:00
" arch_llm_config " : arch_llm_config_string ,
2024-10-09 11:25:07 -07:00
" arch_clusters " : inferred_clusters ,
2025-09-10 17:10:15 -07:00
" arch_llm_providers " : updated_llm_providers ,
2024-10-09 11:25:07 -07:00
" arch_tracing " : arch_tracing ,
2025-01-17 18:25:55 -08:00
" local_llms " : llms_with_endpoint ,
2025-03-19 15:21:34 -07:00
" agent_orchestrator " : agent_orchestrator ,
2025-09-15 16:01:48 -07:00
" listeners " : config_yaml [ " listeners " ] . copy ( ) ,
2024-10-03 18:21:27 -07:00
}
rendered = template . render ( data )
print ( ENVOY_CONFIG_FILE_RENDERED )
2024-12-20 13:25:01 -08:00
print ( rendered )
2024-10-09 11:25:07 -07:00
with open ( ENVOY_CONFIG_FILE_RENDERED , " w " ) as file :
2024-10-03 18:21:27 -07:00
file . write ( rendered )
2025-07-11 16:42:16 -07:00
with open ( ARCH_CONFIG_FILE_RENDERED , " w " ) as file :
file . write ( arch_config_string )
2024-10-09 11:25:07 -07:00
2024-10-03 18:21:27 -07:00
def validate_prompt_config ( arch_config_file , arch_config_schema_file ) :
2024-10-09 11:25:07 -07:00
with open ( arch_config_file , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config = file . read ( )
2024-10-09 11:25:07 -07:00
with open ( arch_config_schema_file , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config_schema = file . read ( )
config_yaml = yaml . safe_load ( arch_config )
config_schema_yaml = yaml . safe_load ( arch_config_schema )
try :
validate ( config_yaml , config_schema_yaml )
except Exception as e :
2024-10-09 11:25:07 -07:00
print (
2025-07-11 16:42:16 -07:00
f " Error validating arch_config file: { arch_config_file } , schema file: { arch_config_schema_file } , error: { e } "
2024-10-09 11:25:07 -07:00
)
2024-10-03 18:21:27 -07:00
raise e
2024-10-09 11:25:07 -07:00
if __name__ == " __main__ " :
2024-10-03 18:21:27 -07:00
validate_and_render_schema ( )