plano/cli/planoai/config_generator.py
Musa 897fda2deb
fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level (#912)
* fix(routing): auto-migrate v0.3.0 inline routing_preferences to v0.4.0 top-level

Lift inline routing_preferences under each model_provider into the
top-level routing_preferences list with merged models[] and bump
version to v0.4.0, with a deprecation warning. Existing v0.3.0
demo configs (Claude Code, Codex, preference_based_routing, etc.)
keep working unchanged. Schema flags the inline shape as deprecated
but still accepts it. Docs and skills updated to canonical top-level
multi-model form.

* test(common): bump reference config assertion to v0.4.0

The rendered reference config was bumped to v0.4.0 when its inline
routing_preferences were lifted to the top level; align the
configuration deserialization test with that change.

* fix(config_generator): bump version to v0.4.0 up front in migration

Move the v0.3.0 -> v0.4.0 version bump to the top of
migrate_inline_routing_preferences so it runs unconditionally,
including for configs that already declare top-level
routing_preferences at v0.3.0. Previously the bump only fired
when inline migration produced entries, leaving top-level v0.3.0
configs rejected by brightstaff's v0.4.0 gate. Tests updated to
cover the new behavior and to confirm we never downgrade newer
versions.

* fix(config_generator): gate routing_preferences migration on version < v0.4.0

Short-circuit the migration when the config already declares v0.4.0
or newer. Anything at v0.4.0+ is assumed to be on the canonical
top-level shape and is passed through untouched, including stray
inline preferences (which are the author's bug to fix). Only v0.3.0
and older configs are rewritten and bumped.
2026-04-24 12:31:44 -07:00

664 lines
26 KiB
Python

import json
import os
import uuid
from planoai.utils import convert_legacy_listeners
from jinja2 import Environment, FileSystemLoader
import yaml
from jsonschema import validate, ValidationError
from urllib.parse import urlparse
from copy import deepcopy
from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
"azure_openai",
"ollama",
"qwen",
"amazon_bedrock",
"plano",
]
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL = [
"deepseek",
"groq",
"mistral",
"openai",
"xiaomi",
"gemini",
"anthropic",
"together_ai",
"xai",
"moonshotai",
"zhipu",
"chatgpt",
"digitalocean",
"vercel",
"openrouter",
]
CHATGPT_API_BASE = "https://chatgpt.com/backend-api/codex"
CHATGPT_DEFAULT_ORIGINATOR = "codex_cli_rs"
CHATGPT_DEFAULT_USER_AGENT = "codex_cli_rs/0.0.0 (Unknown 0; unknown) unknown"
SUPPORTED_PROVIDERS = (
SUPPORTED_PROVIDERS_WITHOUT_BASE_URL + SUPPORTED_PROVIDERS_WITH_BASE_URL
)
def get_endpoint_and_port(endpoint, protocol):
endpoint_tokens = endpoint.split(":")
if len(endpoint_tokens) > 1:
endpoint = endpoint_tokens[0]
port = int(endpoint_tokens[1])
return endpoint, port
else:
if protocol == "http":
port = 80
else:
port = 443
return endpoint, port
def migrate_inline_routing_preferences(config_yaml):
"""Lift v0.3.0-style inline ``routing_preferences`` under each
``model_providers`` entry to the v0.4.0 top-level ``routing_preferences``
list with ``models: [...]``.
This function is a no-op for configs whose ``version`` is already
``v0.4.0`` or newer — those are assumed to be on the canonical
top-level shape and are passed through untouched.
For older configs, the version is bumped to ``v0.4.0`` up front so
brightstaff's v0.4.0 gate for top-level ``routing_preferences``
accepts the rendered config, then inline preferences under each
provider are lifted into the top-level list. Preferences with the
same ``name`` across multiple providers are merged into a single
top-level entry whose ``models`` list contains every provider's
full ``<provider>/<model>`` string in declaration order. The first
``description`` encountered wins; conflicts are warned, not errored,
so existing v0.3.0 configs keep compiling. Any top-level preference
already defined by the user is preserved as-is.
"""
current_version = str(config_yaml.get("version", ""))
if _version_tuple(current_version) >= (0, 4, 0):
return
config_yaml["version"] = "v0.4.0"
model_providers = config_yaml.get("model_providers") or []
if not model_providers:
return
migrated = {}
for model_provider in model_providers:
inline_prefs = model_provider.get("routing_preferences")
if not inline_prefs:
continue
full_model_name = model_provider.get("model")
if not full_model_name:
continue
if "/" in full_model_name and full_model_name.split("/")[-1].strip() == "*":
raise Exception(
f"Model {full_model_name} has routing_preferences but uses wildcard (*). Models with routing preferences cannot be wildcards."
)
for pref in inline_prefs:
name = pref.get("name")
description = pref.get("description", "")
if not name:
continue
if name in migrated:
entry = migrated[name]
if description and description != entry["description"]:
print(
f"WARNING: routing preference '{name}' has conflicting descriptions across providers; keeping the first one."
)
if full_model_name not in entry["models"]:
entry["models"].append(full_model_name)
else:
migrated[name] = {
"name": name,
"description": description,
"models": [full_model_name],
}
if not migrated:
return
for model_provider in model_providers:
if "routing_preferences" in model_provider:
del model_provider["routing_preferences"]
existing_top_level = config_yaml.get("routing_preferences") or []
existing_names = {entry.get("name") for entry in existing_top_level}
merged = list(existing_top_level)
for name, entry in migrated.items():
if name in existing_names:
continue
merged.append(entry)
config_yaml["routing_preferences"] = merged
print(
"WARNING: inline routing_preferences under model_providers is deprecated "
"and has been auto-migrated to top-level routing_preferences. Update your "
"config to v0.4.0 top-level form. See docs/routing-api.md"
)
def _version_tuple(version_string):
stripped = version_string.strip().lstrip("vV")
if not stripped:
return (0, 0, 0)
parts = stripped.split("-", 1)[0].split(".")
out = []
for part in parts[:3]:
try:
out.append(int(part))
except ValueError:
out.append(0)
while len(out) < 3:
out.append(0)
return tuple(out)
def validate_and_render_schema():
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
)
PLANO_CONFIG_FILE = os.getenv("PLANO_CONFIG_FILE", "/app/plano_config.yaml")
PLANO_CONFIG_FILE_RENDERED = os.getenv(
"PLANO_CONFIG_FILE_RENDERED", "/app/plano_config_rendered.yaml"
)
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
)
PLANO_CONFIG_SCHEMA_FILE = os.getenv(
"PLANO_CONFIG_SCHEMA_FILE", "plano_config_schema.yaml"
)
env = Environment(loader=FileSystemLoader(os.getenv("TEMPLATE_ROOT", "./")))
template = env.get_template(ENVOY_CONFIG_TEMPLATE_FILE)
try:
validate_prompt_config(PLANO_CONFIG_FILE, PLANO_CONFIG_SCHEMA_FILE)
except Exception as e:
print(str(e))
exit(1) # validate_prompt_config failed. Exit
with open(PLANO_CONFIG_FILE, "r") as file:
plano_config = file.read()
with open(PLANO_CONFIG_SCHEMA_FILE, "r") as file:
plano_config_schema = file.read()
config_yaml = yaml.safe_load(plano_config)
_ = yaml.safe_load(plano_config_schema)
inferred_clusters = {}
# Convert legacy llm_providers to model_providers
if "llm_providers" in config_yaml:
if "model_providers" in config_yaml:
raise Exception(
"Please provide either llm_providers or model_providers, not both. llm_providers is deprecated, please use model_providers instead"
)
config_yaml["model_providers"] = config_yaml["llm_providers"]
del config_yaml["llm_providers"]
migrate_inline_routing_preferences(config_yaml)
listeners, llm_gateway, prompt_gateway = convert_legacy_listeners(
config_yaml.get("listeners"), config_yaml.get("model_providers")
)
config_yaml["listeners"] = listeners
endpoints = config_yaml.get("endpoints", {})
# Process agents section and convert to endpoints
agents = config_yaml.get("agents", [])
filters = config_yaml.get("filters", [])
agents_combined = agents + filters
agent_id_keys = set()
for agent in agents_combined:
agent_id = agent.get("id")
if agent_id in agent_id_keys:
raise Exception(
f"Duplicate agent id {agent_id}, please provide unique id for each agent"
)
agent_id_keys.add(agent_id)
agent_endpoint = agent.get("url")
if agent_id and agent_endpoint:
urlparse_result = urlparse(agent_endpoint)
if urlparse_result.scheme and urlparse_result.hostname:
protocol = urlparse_result.scheme
port = urlparse_result.port
if port is None:
if protocol == "http":
port = 80
else:
port = 443
endpoints[agent_id] = {
"endpoint": urlparse_result.hostname,
"port": port,
"protocol": protocol,
}
# override the inferred clusters with the ones defined in the config
for name, endpoint_details in endpoints.items():
inferred_clusters[name] = endpoint_details
# Only call get_endpoint_and_port for manually defined endpoints, not agent-derived ones
if "port" not in endpoint_details:
endpoint = inferred_clusters[name]["endpoint"]
protocol = inferred_clusters[name].get("protocol", "http")
(
inferred_clusters[name]["endpoint"],
inferred_clusters[name]["port"],
) = get_endpoint_and_port(endpoint, protocol)
print("defined clusters from plano_config.yaml: ", json.dumps(inferred_clusters))
if "prompt_targets" in config_yaml:
for prompt_target in config_yaml["prompt_targets"]:
name = prompt_target.get("endpoint", {}).get("name", None)
if not name:
continue
if name not in inferred_clusters:
raise Exception(
f"Unknown endpoint {name}, please add it in endpoints section in your plano_config.yaml file"
)
plano_tracing = config_yaml.get("tracing", {})
# Resolution order: config yaml > OTEL_TRACING_GRPC_ENDPOINT env var > hardcoded default
opentracing_grpc_endpoint = plano_tracing.get(
"opentracing_grpc_endpoint",
os.environ.get(
"OTEL_TRACING_GRPC_ENDPOINT", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
),
)
# resolve env vars in opentracing_grpc_endpoint if present
if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint:
opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint)
print(
f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables"
)
plano_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
# ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path
if opentracing_grpc_endpoint:
urlparse_result = urlparse(opentracing_grpc_endpoint)
if urlparse_result.scheme != "http":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http"
)
if urlparse_result.path and urlparse_result.path != "/":
raise Exception(
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
)
llms_with_endpoint = []
llms_with_endpoint_cluster_names = set()
updated_model_providers = []
model_provider_name_set = set()
llms_with_usage = []
model_name_keys = set()
top_level_preferences = config_yaml.get("routing_preferences") or []
seen_pref_names = set()
for pref in top_level_preferences:
pref_name = pref.get("name")
if pref_name in seen_pref_names:
raise Exception(
f'Duplicate routing preference name "{pref_name}", please provide unique name for each routing preference'
)
seen_pref_names.add(pref_name)
print("listeners: ", listeners)
for listener in listeners:
if (
listener.get("model_providers") is None
or listener.get("model_providers") == []
):
continue
print("Processing listener with model_providers: ", listener)
name = listener.get("name", None)
for model_provider in listener.get("model_providers", []):
if model_provider.get("usage", None):
llms_with_usage.append(model_provider["name"])
if model_provider.get("name") in model_provider_name_set:
raise Exception(
f"Duplicate model_provider name {model_provider.get('name')}, please provide unique name for each model_provider"
)
model_name = model_provider.get("model")
print("Processing model_provider: ", model_provider)
# Check if this is a wildcard model (provider/*)
is_wildcard = False
if "/" in model_name:
model_name_tokens = model_name.split("/")
if len(model_name_tokens) >= 2 and model_name_tokens[-1] == "*":
is_wildcard = True
if model_name in model_name_keys and not is_wildcard:
raise Exception(
f"Duplicate model name {model_name}, please provide unique model name for each model_provider"
)
if not is_wildcard:
model_name_keys.add(model_name)
if model_provider.get("name") is None:
model_provider["name"] = model_name
model_provider_name_set.add(model_provider.get("name"))
model_name_tokens = model_name.split("/")
if len(model_name_tokens) < 2:
raise Exception(
f"Invalid model name {model_name}. Please provide model name in the format <provider>/<model_id> or <provider>/* for wildcards."
)
provider = model_name_tokens[0].strip()
# Check if this is a wildcard (provider/*)
is_wildcard = model_name_tokens[-1].strip() == "*"
# Validate wildcard constraints
if is_wildcard:
if model_provider.get("default", False):
raise Exception(
f"Model {model_name} is configured as default but uses wildcard (*). Default models cannot be wildcards."
)
# Validate azure_openai and ollama provider requires base_url
if (provider in SUPPORTED_PROVIDERS_WITH_BASE_URL) and model_provider.get(
"base_url"
) is None:
raise Exception(
f"Provider '{provider}' requires 'base_url' to be set for model {model_name}"
)
model_id = "/".join(model_name_tokens[1:])
# For wildcard providers, allow any provider name
if not is_wildcard and provider not in SUPPORTED_PROVIDERS:
if (
model_provider.get("base_url", None) is None
or model_provider.get("provider_interface", None) is None
):
raise Exception(
f"Must provide base_url and provider_interface for unsupported provider {provider} for model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
)
provider = model_provider.get("provider_interface", None)
elif is_wildcard and provider not in SUPPORTED_PROVIDERS:
# Wildcard models with unsupported providers require base_url and provider_interface
if (
model_provider.get("base_url", None) is None
or model_provider.get("provider_interface", None) is None
):
raise Exception(
f"Must provide base_url and provider_interface for unsupported provider {provider} for wildcard model {model_name}. Supported providers are: {', '.join(SUPPORTED_PROVIDERS)}"
)
provider = model_provider.get("provider_interface", None)
elif (
provider in SUPPORTED_PROVIDERS
and model_provider.get("provider_interface", None) is not None
):
# For supported providers, provider_interface should not be manually set
raise Exception(
f"Please provide provider interface as part of model name {model_name} using the format <provider>/<model_id>. For example, use 'openai/gpt-3.5-turbo' instead of 'gpt-3.5-turbo' "
)
# For wildcard models, don't add model_id to the keys since it's "*"
if not is_wildcard:
if model_id in model_name_keys:
raise Exception(
f"Duplicate model_id {model_id}, please provide unique model_id for each model_provider"
)
model_name_keys.add(model_id)
# Warn if both passthrough_auth and access_key are configured
if model_provider.get("passthrough_auth") and model_provider.get(
"access_key"
):
print(
f"WARNING: Model provider '{model_provider.get('name')}' has both 'passthrough_auth: true' and 'access_key' configured. "
f"The access_key will be ignored and the client's Authorization header will be forwarded instead."
)
model_provider["model"] = model_id
model_provider["provider_interface"] = provider
model_provider_name_set.add(model_provider.get("name"))
if model_provider.get("provider") and model_provider.get(
"provider_interface"
):
raise Exception(
"Please provide either provider or provider_interface, not both"
)
if model_provider.get("provider"):
provider = model_provider["provider"]
model_provider["provider_interface"] = provider
del model_provider["provider"]
# Auto-wire ChatGPT provider: inject base_url, passthrough_auth, and extra headers
if provider == "chatgpt":
if not model_provider.get("base_url"):
model_provider["base_url"] = CHATGPT_API_BASE
if not model_provider.get("access_key") and not model_provider.get(
"passthrough_auth"
):
model_provider["passthrough_auth"] = True
headers = model_provider.get("headers", {})
headers.setdefault(
"ChatGPT-Account-Id",
os.environ.get("CHATGPT_ACCOUNT_ID", ""),
)
headers.setdefault("originator", CHATGPT_DEFAULT_ORIGINATOR)
headers.setdefault("user-agent", CHATGPT_DEFAULT_USER_AGENT)
headers.setdefault("session_id", str(uuid.uuid4()))
model_provider["headers"] = headers
updated_model_providers.append(model_provider)
if model_provider.get("base_url", None):
base_url = model_provider["base_url"]
urlparse_result = urlparse(base_url)
base_url_path_prefix = urlparse_result.path
if base_url_path_prefix and base_url_path_prefix != "/":
# we will now support base_url_path_prefix. This means that the user can provide base_url like http://example.com/path and we will extract /path as base_url_path_prefix
model_provider["base_url_path_prefix"] = base_url_path_prefix
if urlparse_result.scheme == "" or urlparse_result.scheme not in [
"http",
"https",
]:
raise Exception(
"Please provide a valid URL with scheme (http/https) in base_url"
)
protocol = urlparse_result.scheme
port = urlparse_result.port
if port is None:
if protocol == "http":
port = 80
else:
port = 443
endpoint = urlparse_result.hostname
model_provider["endpoint"] = endpoint
model_provider["port"] = port
model_provider["protocol"] = protocol
cluster_name = (
provider + "_" + endpoint
) # make name unique by appending endpoint
model_provider["cluster_name"] = cluster_name
# Only add if cluster_name is not already present to avoid duplicates
if cluster_name not in llms_with_endpoint_cluster_names:
llms_with_endpoint.append(model_provider)
llms_with_endpoint_cluster_names.add(cluster_name)
overrides_config = config_yaml.get("overrides", {})
# Build lookup of model names (already prefix-stripped by config processing)
model_name_set = {mp.get("model") for mp in updated_model_providers}
# Auto-add plano-orchestrator provider if routing preferences exist and no provider matches the routing model
router_model = overrides_config.get("llm_routing_model", "Plano-Orchestrator")
router_model_id = (
router_model.split("/", 1)[1] if "/" in router_model else router_model
)
if len(seen_pref_names) > 0 and router_model_id not in model_name_set:
updated_model_providers.append(
{
"name": "plano-orchestrator",
"provider_interface": "plano",
"model": router_model_id,
"internal": True,
}
)
# Always add arch-function model provider if not already defined
if "arch-function" not in model_provider_name_set:
updated_model_providers.append(
{
"name": "arch-function",
"provider_interface": "plano",
"model": "Arch-Function",
"internal": True,
}
)
# Auto-add plano-orchestrator provider if no provider matches the orchestrator model
orchestrator_model = overrides_config.get(
"agent_orchestration_model", "Plano-Orchestrator"
)
orchestrator_model_id = (
orchestrator_model.split("/", 1)[1]
if "/" in orchestrator_model
else orchestrator_model
)
if orchestrator_model_id not in model_name_set:
updated_model_providers.append(
{
"name": "plano/orchestrator",
"provider_interface": "plano",
"model": orchestrator_model_id,
"internal": True,
}
)
config_yaml["model_providers"] = deepcopy(updated_model_providers)
listeners_with_provider = 0
for listener in listeners:
print("Processing listener: ", listener)
model_providers = listener.get("model_providers", None)
if model_providers is not None:
listeners_with_provider += 1
if listeners_with_provider > 1:
raise Exception(
"Please provide model_providers either under listeners or at root level, not both. Currently we don't support multiple listeners with model_providers"
)
# Validate input_filters IDs on listeners reference valid agent/filter IDs
for listener in listeners:
listener_input_filters = listener.get("input_filters", [])
for fc_id in listener_input_filters:
if fc_id not in agent_id_keys:
raise Exception(
f"Listener '{listener.get('name', 'unknown')}' references input_filters id '{fc_id}' "
f"which is not defined in agents or filters. Available ids: {', '.join(sorted(agent_id_keys))}"
)
# Validate model aliases if present
if "model_aliases" in config_yaml:
model_aliases = config_yaml["model_aliases"]
for alias_name, alias_config in model_aliases.items():
target = alias_config.get("target")
if target not in model_name_keys:
raise Exception(
f"Model alias 2 - '{alias_name}' targets '{target}' which is not defined as a model. Available models: {', '.join(sorted(model_name_keys))}"
)
plano_config_string = yaml.dump(config_yaml)
plano_llm_config_string = yaml.dump(config_yaml)
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
"use_agent_orchestrator", False
)
agent_orchestrator = None
if use_agent_orchestrator:
print("Using agent orchestrator")
if len(endpoints) == 0:
raise Exception(
"Please provide agent orchestrator in the endpoints section in your plano_config.yaml file"
)
elif len(endpoints) > 1:
raise Exception(
"Please provide single agent orchestrator in the endpoints section in your plano_config.yaml file"
)
else:
agent_orchestrator = list(endpoints.keys())[0]
print("agent_orchestrator: ", agent_orchestrator)
overrides = config_yaml.get("overrides", {})
upstream_connect_timeout = overrides.get("upstream_connect_timeout", "5s")
upstream_tls_ca_path = overrides.get(
"upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt"
)
data = {
"prompt_gateway_listener": prompt_gateway,
"llm_gateway_listener": llm_gateway,
"plano_config": plano_config_string,
"plano_llm_config": plano_llm_config_string,
"plano_clusters": inferred_clusters,
"plano_model_providers": updated_model_providers,
"plano_tracing": plano_tracing,
"local_llms": llms_with_endpoint,
"agent_orchestrator": agent_orchestrator,
"listeners": listeners,
"upstream_connect_timeout": upstream_connect_timeout,
"upstream_tls_ca_path": upstream_tls_ca_path,
}
rendered = template.render(data)
print(ENVOY_CONFIG_FILE_RENDERED)
print(rendered)
with open(ENVOY_CONFIG_FILE_RENDERED, "w") as file:
file.write(rendered)
with open(PLANO_CONFIG_FILE_RENDERED, "w") as file:
file.write(plano_config_string)
def validate_prompt_config(plano_config_file, plano_config_schema_file):
with open(plano_config_file, "r") as file:
plano_config = file.read()
with open(plano_config_schema_file, "r") as file:
plano_config_schema = file.read()
config_yaml = yaml.safe_load(plano_config)
config_schema_yaml = yaml.safe_load(plano_config_schema)
try:
validate(config_yaml, config_schema_yaml)
except ValidationError as e:
path = (
"".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
)
raise ValidationError(
f"{e.message}\n Location: {path}\n Value: {e.instance}"
) from None
except Exception as e:
raise
if __name__ == "__main__":
validate_and_render_schema()