mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
add preliminary support for llm agents (#432)
This commit is contained in:
parent
8d66fefded
commit
84cd1df7bf
29 changed files with 1388 additions and 121 deletions
|
|
@ -28,4 +28,5 @@ COPY arch/arch_config_schema.yaml .
|
|||
RUN pip install requests
|
||||
RUN touch /var/log/envoy.log
|
||||
|
||||
# ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --log-level trace 2>&1 | tee /var/log/envoy.log"]
|
||||
ENTRYPOINT ["sh","-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
|
||||
|
|
|
|||
|
|
@ -93,7 +93,6 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
- model
|
||||
overrides:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -101,6 +100,8 @@ properties:
|
|||
type: number
|
||||
optimize_context_window:
|
||||
type: boolean
|
||||
use_agent_orchestrator:
|
||||
type: boolean
|
||||
system_prompt:
|
||||
type: string
|
||||
prompt_targets:
|
||||
|
|
|
|||
|
|
@ -142,6 +142,19 @@ static_resources:
|
|||
cluster: {{ llm_cluster_name }}
|
||||
timeout: 60s
|
||||
{% endfor %}
|
||||
|
||||
{% if agent_orchestrator %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
headers:
|
||||
- name: "x-arch-llm-provider"
|
||||
string_match:
|
||||
exact: {{ agent_orchestrator }}
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: {{ agent_orchestrator }}
|
||||
timeout: 60s
|
||||
{% endif %}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ def validate_and_render_schema():
|
|||
arch_config_schema = file.read()
|
||||
|
||||
config_yaml = yaml.safe_load(arch_config)
|
||||
config_schema_yaml = yaml.safe_load(arch_config_schema)
|
||||
_ = yaml.safe_load(arch_config_schema)
|
||||
inferred_clusters = {}
|
||||
|
||||
endpoints = config_yaml.get("endpoints", {})
|
||||
|
|
@ -150,6 +150,26 @@ def validate_and_render_schema():
|
|||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
|
||||
use_agent_orchestrator = config_yaml.get("overrides", {}).get(
|
||||
"use_agent_orchestrator", False
|
||||
)
|
||||
|
||||
agent_orchestrator = None
|
||||
if use_agent_orchestrator:
|
||||
print("Using agent orchestrator")
|
||||
|
||||
if len(endpoints) == 0:
|
||||
raise Exception(
|
||||
"Please provide agent orchestrator in the endpoints section in your arch_config.yaml file"
|
||||
)
|
||||
elif len(endpoints) > 1:
|
||||
raise Exception(
|
||||
"Please provide single agent orchestrator in the endpoints section in your arch_config.yaml file"
|
||||
)
|
||||
else:
|
||||
agent_orchestrator = list(endpoints.keys())[0]
|
||||
|
||||
print("agent_orchestrator: ", agent_orchestrator)
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway_listener,
|
||||
"llm_gateway_listener": llm_gateway_listener,
|
||||
|
|
@ -159,6 +179,7 @@ def validate_and_render_schema():
|
|||
"arch_llm_providers": config_yaml["llm_providers"],
|
||||
"arch_tracing": arch_tracing,
|
||||
"local_llms": llms_with_endpoint,
|
||||
"agent_orchestrator": agent_orchestrator,
|
||||
}
|
||||
|
||||
rendered = template.render(data)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue