mirror of
https://github.com/katanemo/plano.git
synced 2026-06-26 15:39:40 +02:00
fix more
This commit is contained in:
parent
32838584cf
commit
093834bb05
14 changed files with 623 additions and 48 deletions
|
|
@ -286,6 +286,100 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
- name: agents_traffic
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 8001
|
||||
traffic_direction: OUTBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||
generate_request_id: true
|
||||
tracing:
|
||||
provider:
|
||||
name: envoy.tracers.opentelemetry
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||
grpc_service:
|
||||
envoy_grpc:
|
||||
cluster_name: opentelemetry_collector
|
||||
timeout: 0.250s
|
||||
service_name: arch_gateway
|
||||
random_sampling:
|
||||
value: {{ arch_tracing.random_sampling }}
|
||||
{% endif %}
|
||||
stat_prefix: agents_traffic
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
access_log:
|
||||
- name: envoy.access_loggers.file
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||
path: "/var/log/access_llm.log"
|
||||
route_config:
|
||||
name: local_routes
|
||||
virtual_hosts:
|
||||
- name: local_service
|
||||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/healthz"
|
||||
direct_response:
|
||||
status: 200
|
||||
- match:
|
||||
prefix: "/"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
prefix_rewrite: "/agents/"
|
||||
cluster: bright_staff
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: envoy.compression.brotli.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.compressor.v3.Brotli
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
window_bits: 9
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: envoy.compression.brotli.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.brotli.decompressor.v3.Brotli
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
|
||||
- name: egress_traffic
|
||||
address:
|
||||
socket_address:
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json
|
||||
import subprocess
|
||||
import os
|
||||
import time
|
||||
|
|
@ -25,7 +26,7 @@ from cli.docker_cli import (
|
|||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def _get_gateway_ports(arch_config_file: str) -> tuple:
|
||||
def _get_gateway_ports(arch_config_file: str) -> list[int]:
|
||||
PROMPT_GATEWAY_DEFAULT_PORT = 10000
|
||||
LLM_GATEWAY_DEFAULT_PORT = 12000
|
||||
|
||||
|
|
@ -34,18 +35,13 @@ def _get_gateway_ports(arch_config_file: str) -> tuple:
|
|||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
prompt_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("ingress_traffic", {})
|
||||
.get("port", PROMPT_GATEWAY_DEFAULT_PORT)
|
||||
)
|
||||
llm_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("egress_traffic", {})
|
||||
.get("port", LLM_GATEWAY_DEFAULT_PORT)
|
||||
)
|
||||
print("arch config dict json string: ", json.dumps(arch_config_dict))
|
||||
|
||||
return prompt_gateway_port, llm_gateway_port
|
||||
all_ports = [
|
||||
listener.get("port") for listener in arch_config_dict.get("listeners", [])
|
||||
]
|
||||
|
||||
return all_ports
|
||||
|
||||
|
||||
def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
||||
|
|
@ -67,14 +63,13 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
docker_stop_container(ARCHGW_DOCKER_NAME)
|
||||
docker_remove_container(ARCHGW_DOCKER_NAME)
|
||||
|
||||
prompt_gateway_port, llm_gateway_port = _get_gateway_ports(arch_config_file)
|
||||
gateway_ports = _get_gateway_ports(arch_config_file)
|
||||
|
||||
return_code, _, archgw_stderr = docker_start_archgw_detached(
|
||||
arch_config_file,
|
||||
os.path.expanduser("~/archgw_logs"),
|
||||
env,
|
||||
prompt_gateway_port,
|
||||
llm_gateway_port,
|
||||
gateway_ports,
|
||||
)
|
||||
if return_code != 0:
|
||||
log.info("Failed to start arch gateway: " + str(return_code))
|
||||
|
|
@ -83,13 +78,17 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
|
||||
start_time = time.time()
|
||||
while True:
|
||||
prompt_gateway_health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{prompt_gateway_port}/healthz"
|
||||
)
|
||||
|
||||
llm_gateway_health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{llm_gateway_port}/healthz"
|
||||
)
|
||||
all_listeners_healthy = True
|
||||
for port in gateway_ports:
|
||||
log.info(f"Checking health endpoint on port {port}")
|
||||
health_check_status = health_check_endpoint(
|
||||
f"http://localhost:{port}/healthz"
|
||||
)
|
||||
if health_check_status:
|
||||
log.info(f"Gateway on port {port} is healthy!")
|
||||
else:
|
||||
all_listeners_healthy = False
|
||||
log.info(f"Gateway on port {port} is not healthy yet.")
|
||||
|
||||
archgw_status = docker_container_status(ARCHGW_DOCKER_NAME)
|
||||
current_time = time.time()
|
||||
|
|
@ -106,7 +105,7 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
stream_gateway_logs(follow=False)
|
||||
sys.exit(1)
|
||||
|
||||
if prompt_gateway_health_check_status or llm_gateway_health_check_status:
|
||||
if all_listeners_healthy:
|
||||
log.info("archgw is running and is healthy!")
|
||||
break
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -44,17 +44,18 @@ def docker_start_archgw_detached(
|
|||
arch_config_file: str,
|
||||
logs_path_abs: str,
|
||||
env: dict,
|
||||
prompt_gateway_port,
|
||||
llm_gateway_port,
|
||||
gateway_ports: list[int],
|
||||
) -> str:
|
||||
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
|
||||
|
||||
port_mappings = [
|
||||
f"{prompt_gateway_port}:{prompt_gateway_port}",
|
||||
f"{llm_gateway_port}:{llm_gateway_port}",
|
||||
f"{llm_gateway_port+1}:{llm_gateway_port+1}",
|
||||
f"{12001}:{12001}",
|
||||
"19901:9901",
|
||||
]
|
||||
|
||||
for port in gateway_ports:
|
||||
port_mappings.append(f"{port}:{port}")
|
||||
|
||||
port_mappings_args = [item for port in port_mappings for item in ("-p", port)]
|
||||
|
||||
volume_mappings = [
|
||||
|
|
|
|||
|
|
@ -28,9 +28,9 @@ def get_llm_provider_access_keys(arch_config_file):
|
|||
|
||||
access_key_list = []
|
||||
for llm_provider in arch_config_yaml.get("llm_providers", []):
|
||||
acess_key = llm_provider.get("access_key")
|
||||
if acess_key is not None:
|
||||
access_key_list.append(acess_key)
|
||||
access_key = llm_provider.get("access_key")
|
||||
if access_key is not None:
|
||||
access_key_list.append(access_key)
|
||||
|
||||
for prompt_target in arch_config_yaml.get("prompt_targets", []):
|
||||
for k, v in prompt_target.get("endpoint", {}).get("http_headers", {}).items():
|
||||
|
|
@ -44,6 +44,12 @@ def get_llm_provider_access_keys(arch_config_file):
|
|||
else:
|
||||
access_key_list.append(v)
|
||||
|
||||
for listener in arch_config_yaml.get("listeners", []):
|
||||
for llm_provider in listener.get("llm_providers", []):
|
||||
access_key = llm_provider.get("access_key")
|
||||
if access_key is not None:
|
||||
access_key_list.append(access_key)
|
||||
|
||||
return access_key_list
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue