2024-12-20 13:25:01 -08:00
import json
2024-10-03 18:21:27 -07:00
import os
from jinja2 import Environment , FileSystemLoader
import yaml
from jsonschema import validate
2025-03-05 17:20:04 -08:00
from urllib . parse import urlparse
2024-10-03 18:21:27 -07:00
2025-07-11 16:42:16 -07:00
SUPPORTED_PROVIDERS = [
" arch " ,
" deepseek " ,
" groq " ,
" mistral " ,
" openai " ,
" gemini " ,
2025-09-10 07:40:30 -07:00
" anthropic " ,
2025-09-18 18:36:30 -07:00
" together_ai " ,
" azure_openai " ,
" xai " ,
2025-09-19 10:19:57 -07:00
" ollama " ,
2025-07-11 16:42:16 -07:00
]
2024-10-09 11:25:07 -07:00
2025-02-03 14:51:59 -08:00
def get_endpoint_and_port ( endpoint , protocol ) :
endpoint_tokens = endpoint . split ( " : " )
if len ( endpoint_tokens ) > 1 :
endpoint = endpoint_tokens [ 0 ]
port = int ( endpoint_tokens [ 1 ] )
return endpoint , port
else :
if protocol == " http " :
port = 80
else :
port = 443
return endpoint , port
2024-10-03 18:21:27 -07:00
def validate_and_render_schema ( ) :
2025-07-11 16:42:16 -07:00
ENVOY_CONFIG_TEMPLATE_FILE = os . getenv (
" ENVOY_CONFIG_TEMPLATE_FILE " , " envoy.template.yaml "
)
ARCH_CONFIG_FILE = os . getenv ( " ARCH_CONFIG_FILE " , " /app/arch_config.yaml " )
ARCH_CONFIG_FILE_RENDERED = os . getenv (
" ARCH_CONFIG_FILE_RENDERED " , " /app/arch_config_rendered.yaml "
)
ENVOY_CONFIG_FILE_RENDERED = os . getenv (
" ENVOY_CONFIG_FILE_RENDERED " , " /etc/envoy/envoy.yaml "
)
ARCH_CONFIG_SCHEMA_FILE = os . getenv (
" ARCH_CONFIG_SCHEMA_FILE " , " arch_config_schema.yaml "
)
env = Environment ( loader = FileSystemLoader ( os . getenv ( " TEMPLATE_ROOT " , " ./ " ) ) )
template = env . get_template ( ENVOY_CONFIG_TEMPLATE_FILE )
2024-10-03 18:21:27 -07:00
try :
validate_prompt_config ( ARCH_CONFIG_FILE , ARCH_CONFIG_SCHEMA_FILE )
except Exception as e :
2024-10-30 17:54:51 -07:00
print ( str ( e ) )
2024-10-09 11:25:07 -07:00
exit ( 1 ) # validate_prompt_config failed. Exit
2024-10-03 18:21:27 -07:00
2024-10-09 11:25:07 -07:00
with open ( ARCH_CONFIG_FILE , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config = file . read ( )
2024-10-09 11:25:07 -07:00
with open ( ARCH_CONFIG_SCHEMA_FILE , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config_schema = file . read ( )
config_yaml = yaml . safe_load ( arch_config )
2025-03-19 15:21:34 -07:00
_ = yaml . safe_load ( arch_config_schema )
2024-10-03 18:21:27 -07:00
inferred_clusters = { }
2024-12-20 13:25:01 -08:00
endpoints = config_yaml . get ( " endpoints " , { } )
# override the inferred clusters with the ones defined in the config
for name , endpoint_details in endpoints . items ( ) :
inferred_clusters [ name ] = endpoint_details
endpoint = inferred_clusters [ name ] [ " endpoint " ]
2025-02-03 14:51:59 -08:00
protocol = inferred_clusters [ name ] . get ( " protocol " , " http " )
(
inferred_clusters [ name ] [ " endpoint " ] ,
inferred_clusters [ name ] [ " port " ] ,
) = get_endpoint_and_port ( endpoint , protocol )
2024-12-20 13:25:01 -08:00
print ( " defined clusters from arch_config.yaml: " , json . dumps ( inferred_clusters ) )
2024-10-28 20:05:06 -04:00
if " prompt_targets " in config_yaml :
for prompt_target in config_yaml [ " prompt_targets " ] :
2024-12-06 14:37:33 -08:00
name = prompt_target . get ( " endpoint " , { } ) . get ( " name " , None )
if not name :
continue
2024-10-28 20:05:06 -04:00
if name not in inferred_clusters :
2024-12-20 13:25:01 -08:00
raise Exception (
f " Unknown endpoint { name } , please add it in endpoints section in your arch_config.yaml file "
)
2024-10-03 18:21:27 -07:00
2024-10-08 16:24:08 -07:00
arch_tracing = config_yaml . get ( " tracing " , { } )
2025-01-17 18:25:55 -08:00
llms_with_endpoint = [ ]
updated_llm_providers = [ ]
2025-09-18 18:36:30 -07:00
2025-05-23 00:51:53 -07:00
llm_provider_name_set = set ( )
2025-07-11 16:42:16 -07:00
model_name_keys = set ( )
model_usage_name_keys = set ( )
2025-01-17 18:25:55 -08:00
for llm_provider in config_yaml [ " llm_providers " ] :
2025-05-23 00:51:53 -07:00
if llm_provider . get ( " name " ) in llm_provider_name_set :
raise Exception (
f " Duplicate llm_provider name { llm_provider . get ( ' name ' ) } , please provide unique name for each llm_provider "
)
2025-07-11 16:42:16 -07:00
model_name = llm_provider . get ( " model " )
if model_name in model_name_keys :
raise Exception (
f " Duplicate model name { model_name } , please provide unique model name for each llm_provider "
)
2025-09-18 18:36:30 -07:00
2025-07-11 16:42:16 -07:00
model_name_keys . add ( model_name )
2025-05-23 00:51:53 -07:00
if llm_provider . get ( " name " ) is None :
2025-07-11 16:42:16 -07:00
llm_provider [ " name " ] = model_name
2025-09-18 18:36:30 -07:00
llm_provider_name_set . add ( llm_provider . get ( " name " ) )
2025-07-11 16:42:16 -07:00
model_name_tokens = model_name . split ( " / " )
if len ( model_name_tokens ) < 2 :
raise Exception (
f " Invalid model name { model_name } . Please provide model name in the format <provider>/<model_id>. "
)
provider = model_name_tokens [ 0 ]
2025-09-19 10:19:57 -07:00
# Validate azure_openai and ollama provider requires base_url
if ( provider == " azure_openai " or provider == " ollama " ) and llm_provider . get (
" base_url "
) is None :
2025-09-18 18:36:30 -07:00
raise Exception (
2025-09-19 10:19:57 -07:00
f " Provider ' { provider } ' requires ' base_url ' to be set for model { model_name } "
2025-09-18 18:36:30 -07:00
)
2025-07-11 16:42:16 -07:00
model_id = " / " . join ( model_name_tokens [ 1 : ] )
if provider not in SUPPORTED_PROVIDERS :
if (
llm_provider . get ( " base_url " , None ) is None
or llm_provider . get ( " provider_interface " , None ) is None
) :
raise Exception (
f " Must provide base_url and provider_interface for unsupported provider { provider } for model { model_name } . Supported providers are: { ' , ' . join ( SUPPORTED_PROVIDERS ) } "
)
provider = llm_provider . get ( " provider_interface " , None )
elif llm_provider . get ( " provider_interface " , None ) is not None :
2025-05-23 00:51:53 -07:00
raise Exception (
2025-07-11 16:42:16 -07:00
f " Please provide provider interface as part of model name { model_name } using the format <provider>/<model_id>. For example, use ' openai/gpt-3.5-turbo ' instead of ' gpt-3.5-turbo ' "
2025-05-23 00:51:53 -07:00
)
2025-07-11 16:42:16 -07:00
if model_id in model_name_keys :
raise Exception (
f " Duplicate model_id { model_id } , please provide unique model_id for each llm_provider "
)
model_name_keys . add ( model_id )
for routing_preference in llm_provider . get ( " routing_preferences " , [ ] ) :
if routing_preference . get ( " name " ) in model_usage_name_keys :
raise Exception (
f " Duplicate routing preference name \" { routing_preference . get ( ' name ' ) } \" , please provide unique name for each routing preference "
)
model_usage_name_keys . add ( routing_preference . get ( " name " ) )
llm_provider [ " model " ] = model_id
llm_provider [ " provider_interface " ] = provider
2025-01-17 18:25:55 -08:00
updated_llm_providers . append ( llm_provider )
2025-07-11 16:42:16 -07:00
if llm_provider . get ( " base_url " , None ) :
2025-03-05 17:20:04 -08:00
base_url = llm_provider [ " base_url " ]
urlparse_result = urlparse ( base_url )
2025-07-11 16:42:16 -07:00
url_path = urlparse_result . path
if url_path and url_path != " / " :
raise Exception (
f " Please provide base_url without path, got { base_url } . Use base_url like ' http://example.com ' instead of ' http://example.com/path ' . "
)
2025-03-05 17:20:04 -08:00
if urlparse_result . scheme == " " or urlparse_result . scheme not in [
" http " ,
" https " ,
] :
raise Exception (
" Please provide a valid URL with scheme (http/https) in base_url "
)
protocol = urlparse_result . scheme
port = urlparse_result . port
if port is None :
if protocol == " http " :
port = 80
else :
port = 443
endpoint = urlparse_result . hostname
llm_provider [ " endpoint " ] = endpoint
llm_provider [ " port " ] = port
llm_provider [ " protocol " ] = protocol
2025-09-18 18:36:30 -07:00
llm_provider [ " cluster_name " ] = (
provider + " _ " + endpoint
) # make name unique by appending endpoint
2025-03-05 17:20:04 -08:00
llms_with_endpoint . append ( llm_provider )
2025-05-23 08:46:12 -07:00
2025-07-11 16:42:16 -07:00
if len ( model_usage_name_keys ) > 0 :
2025-07-08 00:33:40 -07:00
routing_llm_provider = config_yaml . get ( " routing " , { } ) . get ( " llm_provider " , None )
if routing_llm_provider and routing_llm_provider not in llm_provider_name_set :
raise Exception (
f " Routing llm_provider { routing_llm_provider } is not defined in llm_providers "
)
if routing_llm_provider is None and " arch-router " not in llm_provider_name_set :
updated_llm_providers . append (
{
" name " : " arch-router " ,
" provider_interface " : " arch " ,
" model " : config_yaml . get ( " routing " , { } ) . get ( " model " , " Arch-Router " ) ,
}
)
2025-01-17 18:25:55 -08:00
config_yaml [ " llm_providers " ] = updated_llm_providers
2025-09-16 11:12:08 -07:00
# Validate model aliases if present
if " model_aliases " in config_yaml :
model_aliases = config_yaml [ " model_aliases " ]
for alias_name , alias_config in model_aliases . items ( ) :
target = alias_config . get ( " target " )
if target not in model_name_keys :
raise Exception (
f " Model alias ' { alias_name } ' targets ' { target } ' which is not defined as a model. Available models: { ' , ' . join ( sorted ( model_name_keys ) ) } "
)
2024-10-03 18:21:27 -07:00
arch_config_string = yaml . dump ( config_yaml )
2024-10-09 15:47:32 -07:00
arch_llm_config_string = yaml . dump ( config_yaml )
2024-10-03 18:21:27 -07:00
2025-02-14 19:28:10 -08:00
prompt_gateway_listener = config_yaml . get ( " listeners " , { } ) . get (
" ingress_traffic " , { }
)
if prompt_gateway_listener . get ( " port " ) == None :
prompt_gateway_listener [ " port " ] = 10000 # default port for prompt gateway
if prompt_gateway_listener . get ( " address " ) == None :
prompt_gateway_listener [ " address " ] = " 127.0.0.1 "
if prompt_gateway_listener . get ( " timeout " ) == None :
prompt_gateway_listener [ " timeout " ] = " 10s "
llm_gateway_listener = config_yaml . get ( " listeners " , { } ) . get ( " egress_traffic " , { } )
if llm_gateway_listener . get ( " port " ) == None :
llm_gateway_listener [ " port " ] = 12000 # default port for llm gateway
if llm_gateway_listener . get ( " address " ) == None :
llm_gateway_listener [ " address " ] = " 127.0.0.1 "
if llm_gateway_listener . get ( " timeout " ) == None :
2025-09-29 19:23:08 -07:00
llm_gateway_listener [ " timeout " ] = " 300s "
2025-02-14 19:28:10 -08:00
2025-03-19 15:21:34 -07:00
use_agent_orchestrator = config_yaml . get ( " overrides " , { } ) . get (
" use_agent_orchestrator " , False
)
agent_orchestrator = None
if use_agent_orchestrator :
print ( " Using agent orchestrator " )
if len ( endpoints ) == 0 :
raise Exception (
" Please provide agent orchestrator in the endpoints section in your arch_config.yaml file "
)
elif len ( endpoints ) > 1 :
raise Exception (
" Please provide single agent orchestrator in the endpoints section in your arch_config.yaml file "
)
else :
agent_orchestrator = list ( endpoints . keys ( ) ) [ 0 ]
print ( " agent_orchestrator: " , agent_orchestrator )
2025-07-11 16:42:16 -07:00
2024-10-03 18:21:27 -07:00
data = {
2025-02-14 19:28:10 -08:00
" prompt_gateway_listener " : prompt_gateway_listener ,
" llm_gateway_listener " : llm_gateway_listener ,
2024-10-09 11:25:07 -07:00
" arch_config " : arch_config_string ,
2024-10-09 15:47:32 -07:00
" arch_llm_config " : arch_llm_config_string ,
2024-10-09 11:25:07 -07:00
" arch_clusters " : inferred_clusters ,
2025-01-17 18:25:55 -08:00
" arch_llm_providers " : config_yaml [ " llm_providers " ] ,
2024-10-09 11:25:07 -07:00
" arch_tracing " : arch_tracing ,
2025-01-17 18:25:55 -08:00
" local_llms " : llms_with_endpoint ,
2025-03-19 15:21:34 -07:00
" agent_orchestrator " : agent_orchestrator ,
2024-10-03 18:21:27 -07:00
}
rendered = template . render ( data )
print ( ENVOY_CONFIG_FILE_RENDERED )
2024-12-20 13:25:01 -08:00
print ( rendered )
2024-10-09 11:25:07 -07:00
with open ( ENVOY_CONFIG_FILE_RENDERED , " w " ) as file :
2024-10-03 18:21:27 -07:00
file . write ( rendered )
2025-07-11 16:42:16 -07:00
with open ( ARCH_CONFIG_FILE_RENDERED , " w " ) as file :
file . write ( arch_config_string )
2024-10-09 11:25:07 -07:00
2024-10-03 18:21:27 -07:00
def validate_prompt_config ( arch_config_file , arch_config_schema_file ) :
2024-10-09 11:25:07 -07:00
with open ( arch_config_file , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config = file . read ( )
2024-10-09 11:25:07 -07:00
with open ( arch_config_schema_file , " r " ) as file :
2024-10-03 18:21:27 -07:00
arch_config_schema = file . read ( )
config_yaml = yaml . safe_load ( arch_config )
config_schema_yaml = yaml . safe_load ( arch_config_schema )
try :
validate ( config_yaml , config_schema_yaml )
except Exception as e :
2024-10-09 11:25:07 -07:00
print (
2025-07-11 16:42:16 -07:00
f " Error validating arch_config file: { arch_config_file } , schema file: { arch_config_schema_file } , error: { e } "
2024-10-09 11:25:07 -07:00
)
2024-10-03 18:21:27 -07:00
raise e
2024-10-09 11:25:07 -07:00
if __name__ == " __main__ " :
2024-10-03 18:21:27 -07:00
validate_and_render_schema ( )