mirror of
https://github.com/katanemo/plano.git
synced 2026-06-20 15:28:07 +02:00
more changes
This commit is contained in:
parent
a016212588
commit
e57000000d
7 changed files with 168 additions and 12 deletions
|
|
@ -29,6 +29,7 @@ stats_config:
|
||||||
- 180000
|
- 180000
|
||||||
static_resources:
|
static_resources:
|
||||||
listeners:
|
listeners:
|
||||||
|
## begin - legacy listeners
|
||||||
- name: ingress_traffic
|
- name: ingress_traffic
|
||||||
address:
|
address:
|
||||||
socket_address:
|
socket_address:
|
||||||
|
|
@ -214,7 +215,10 @@ static_resources:
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
- name: egress_api_traffic
|
## end - legacy listeners
|
||||||
|
|
||||||
|
# Listener for outbound API traffic to services and clusters
|
||||||
|
- name: outbound_api_traffic
|
||||||
address:
|
address:
|
||||||
socket_address:
|
socket_address:
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
|
|
@ -236,11 +240,11 @@ static_resources:
|
||||||
envoy_grpc:
|
envoy_grpc:
|
||||||
cluster_name: opentelemetry_collector
|
cluster_name: opentelemetry_collector
|
||||||
timeout: 0.250s
|
timeout: 0.250s
|
||||||
service_name: egress_api_traffic
|
service_name: outbound_api_traffic
|
||||||
random_sampling:
|
random_sampling:
|
||||||
value: {{ arch_tracing.random_sampling }}
|
value: {{ arch_tracing.random_sampling }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
stat_prefix: egress_api_traffic
|
stat_prefix: outbound_api_traffic
|
||||||
codec_type: AUTO
|
codec_type: AUTO
|
||||||
scheme_header_transformation:
|
scheme_header_transformation:
|
||||||
scheme_to_overwrite: https
|
scheme_to_overwrite: https
|
||||||
|
|
@ -288,12 +292,16 @@ static_resources:
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
- name: agents_traffic
|
# Listeners for LLM agents
|
||||||
|
{% for listener in listeners %}
|
||||||
|
|
||||||
|
{% if listener.agents %}
|
||||||
|
|
||||||
|
- name: {{ listener.name | replace(" ", "_") }}
|
||||||
address:
|
address:
|
||||||
socket_address:
|
socket_address:
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
port_value: 8001
|
port_value: {{ listener.port }}
|
||||||
traffic_direction: OUTBOUND
|
|
||||||
filter_chains:
|
filter_chains:
|
||||||
- filters:
|
- filters:
|
||||||
- name: envoy.filters.network.http_connection_manager
|
- name: envoy.filters.network.http_connection_manager
|
||||||
|
|
@ -325,6 +333,10 @@ static_resources:
|
||||||
path: "/var/log/access_llm.log"
|
path: "/var/log/access_llm.log"
|
||||||
route_config:
|
route_config:
|
||||||
name: local_routes
|
name: local_routes
|
||||||
|
request_headers_to_add:
|
||||||
|
- header:
|
||||||
|
key: "x-arch-agent-listener-name"
|
||||||
|
value: "{{ listener.name }}"
|
||||||
virtual_hosts:
|
virtual_hosts:
|
||||||
- name: local_service
|
- name: local_service
|
||||||
domains:
|
domains:
|
||||||
|
|
@ -380,7 +392,141 @@ static_resources:
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
# Listeners for LLMs
|
||||||
|
{% for listener in listeners %}
|
||||||
|
|
||||||
|
{% if listener.llm_providers %}
|
||||||
|
|
||||||
|
- name: {{ listener.name | replace(" ", "_") }}
|
||||||
|
address:
|
||||||
|
socket_address:
|
||||||
|
address: {{ listener.address }}
|
||||||
|
port_value: {{ listener.port }}
|
||||||
|
filter_chains:
|
||||||
|
- filters:
|
||||||
|
- name: envoy.filters.network.http_connection_manager
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||||
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||||
|
generate_request_id: true
|
||||||
|
tracing:
|
||||||
|
provider:
|
||||||
|
name: envoy.tracers.opentelemetry
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||||
|
grpc_service:
|
||||||
|
envoy_grpc:
|
||||||
|
cluster_name: opentelemetry_collector
|
||||||
|
timeout: 0.250s
|
||||||
|
service_name: egress_traffic_llm
|
||||||
|
random_sampling:
|
||||||
|
value: {{ arch_tracing.random_sampling }}
|
||||||
|
{% endif %}
|
||||||
|
stat_prefix: egress_traffic
|
||||||
|
codec_type: AUTO
|
||||||
|
scheme_header_transformation:
|
||||||
|
scheme_to_overwrite: https
|
||||||
|
access_log:
|
||||||
|
- name: envoy.access_loggers.file
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||||
|
path: "/var/log/access_llm.log"
|
||||||
|
route_config:
|
||||||
|
name: local_routes
|
||||||
|
virtual_hosts:
|
||||||
|
- name: local_service
|
||||||
|
domains:
|
||||||
|
- "*"
|
||||||
|
routes:
|
||||||
|
{% for provider in listener.llm_providers %}
|
||||||
|
# if endpoint is set then use custom cluster for upstream llm
|
||||||
|
{% if provider.endpoint %}
|
||||||
|
{% set llm_cluster_name = provider.name %}
|
||||||
|
{% else %}
|
||||||
|
{% set llm_cluster_name = provider.provider_interface %}
|
||||||
|
{% endif %}
|
||||||
|
- match:
|
||||||
|
prefix: "/"
|
||||||
|
headers:
|
||||||
|
- name: "x-arch-llm-provider"
|
||||||
|
string_match:
|
||||||
|
exact: {{ llm_cluster_name }}
|
||||||
|
route:
|
||||||
|
auto_host_rewrite: true
|
||||||
|
cluster: {{ llm_cluster_name }}
|
||||||
|
timeout: 60s
|
||||||
|
{% endfor %}
|
||||||
|
- match:
|
||||||
|
prefix: "/"
|
||||||
|
direct_response:
|
||||||
|
status: 400
|
||||||
|
body:
|
||||||
|
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
||||||
|
http_filters:
|
||||||
|
- name: envoy.filters.http.compressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||||
|
compressor_library:
|
||||||
|
name: envoy.compression.brotli.compressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
||||||
|
chunk_size: 8192
|
||||||
|
- name: envoy.filters.http.compressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||||
|
compressor_library:
|
||||||
|
name: compress
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||||
|
memory_level: 3
|
||||||
|
window_bits: 10
|
||||||
|
- name: envoy.filters.http.wasm
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||||
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
||||||
|
value:
|
||||||
|
config:
|
||||||
|
name: "http_config"
|
||||||
|
root_id: llm_gateway
|
||||||
|
configuration:
|
||||||
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
||||||
|
value: |
|
||||||
|
{{ arch_llm_config | indent(32) }}
|
||||||
|
vm_config:
|
||||||
|
runtime: "envoy.wasm.runtime.v8"
|
||||||
|
code:
|
||||||
|
local:
|
||||||
|
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
||||||
|
- name: envoy.filters.http.decompressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||||
|
decompressor_library:
|
||||||
|
name: decompress
|
||||||
|
typed_config:
|
||||||
|
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||||
|
chunk_size: 8192
|
||||||
|
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||||
|
max_inflate_ratio: 1000
|
||||||
|
- name: envoy.filters.http.decompressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||||
|
decompressor_library:
|
||||||
|
name: envoy.compression.brotli.decompressor
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
||||||
|
chunk_size: 8192
|
||||||
|
- name: envoy.filters.http.router
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
|
||||||
|
# begin - legacy llm listeners
|
||||||
|
|
||||||
- name: egress_traffic
|
- name: egress_traffic
|
||||||
address:
|
address:
|
||||||
|
|
@ -595,6 +741,7 @@ static_resources:
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
|
# end - legacy llm listeners
|
||||||
clusters:
|
clusters:
|
||||||
|
|
||||||
- name: arch
|
- name: arch
|
||||||
|
|
|
||||||
|
|
@ -126,6 +126,7 @@ def validate_and_render_schema():
|
||||||
model_name_keys = set()
|
model_name_keys = set()
|
||||||
model_usage_name_keys = set()
|
model_usage_name_keys = set()
|
||||||
|
|
||||||
|
# legacy listeners
|
||||||
# check if type is array or object
|
# check if type is array or object
|
||||||
# if its dict its legacy format let's convert it to array
|
# if its dict its legacy format let's convert it to array
|
||||||
prompt_gateway_listener = {
|
prompt_gateway_listener = {
|
||||||
|
|
@ -344,6 +345,7 @@ def validate_and_render_schema():
|
||||||
"arch_tracing": arch_tracing,
|
"arch_tracing": arch_tracing,
|
||||||
"local_llms": llms_with_endpoint,
|
"local_llms": llms_with_endpoint,
|
||||||
"agent_orchestrator": agent_orchestrator,
|
"agent_orchestrator": agent_orchestrator,
|
||||||
|
"listeners": config_yaml["listeners"].copy(),
|
||||||
}
|
}
|
||||||
|
|
||||||
rendered = template.render(data)
|
rendered = template.render(data)
|
||||||
|
|
|
||||||
|
|
@ -35,12 +35,20 @@ pub async fn agent_chat(
|
||||||
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
listeners: Arc<tokio::sync::RwLock<Vec<common::configuration::Listener>>>,
|
||||||
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
) -> Result<Response<BoxBody<Bytes, hyper::Error>>, hyper::Error> {
|
||||||
// find listener that is running at port 8001 for agents
|
// find listener that is running at port 8001 for agents
|
||||||
|
let listener_name = request.headers().get("x-arch-agent-listener-name");
|
||||||
let listener = {
|
let listener = {
|
||||||
let listeners = listeners.read().await;
|
let listeners = listeners.read().await;
|
||||||
listeners.iter().find(|l| l.port == 8001).cloned()
|
listeners.iter().find(|l| {
|
||||||
|
listener_name
|
||||||
|
.and_then(|name| name.to_str().ok())
|
||||||
|
.map(|name| l.name == name)
|
||||||
|
.unwrap_or(false)
|
||||||
|
}).cloned()
|
||||||
}
|
}
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
info!("Handling request for listener: {}", listener.name);
|
||||||
|
|
||||||
let request_path = request.uri().path().to_string();
|
let request_path = request.uri().path().to_string();
|
||||||
let mut request_headers = request.headers().clone();
|
let mut request_headers = request.headers().clone();
|
||||||
let chat_request_bytes = request.collect().await?.to_bytes();
|
let chat_request_bytes = request.collect().await?.to_bytes();
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,7 @@ listeners:
|
||||||
- access_key: $OPENAI_API_KEY
|
- access_key: $OPENAI_API_KEY
|
||||||
model: openai/gpt-4o-mini
|
model: openai/gpt-4o-mini
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
port: 12000
|
port: 9000
|
||||||
|
|
||||||
tracing:
|
tracing:
|
||||||
random_sampling: 100
|
random_sampling: 100
|
||||||
trace_arch_internal: true
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
# Configuration for archgw LLM gateway
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||||
RAG_MODEL = "gpt-4o-mini"
|
RAG_MODEL = "gpt-4o-mini"
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
# Initialize OpenAI client for archgw
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
# Configuration for archgw LLM gateway
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||||
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
QUERY_REWRITE_MODEL = "gpt-4o-mini"
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
# Initialize OpenAI client for archgw
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Configuration for archgw LLM gateway
|
# Configuration for archgw LLM gateway
|
||||||
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1")
|
LLM_GATEWAY_ENDPOINT = os.getenv("LLM_GATEWAY_ENDPOINT", "http://localhost:9000/v1")
|
||||||
RESPONSE_MODEL = "gpt-4o"
|
RESPONSE_MODEL = "gpt-4o"
|
||||||
|
|
||||||
# Initialize OpenAI client for archgw
|
# Initialize OpenAI client for archgw
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue