mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
use ingress/egress
This commit is contained in:
parent
4a957f2b86
commit
136daa2d3c
18 changed files with 31 additions and 27 deletions
|
|
@ -5,8 +5,9 @@ properties:
|
|||
type: string
|
||||
listeners:
|
||||
type: object
|
||||
additionalProperties: false
|
||||
properties:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
|
|
@ -20,7 +21,7 @@ properties:
|
|||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
llm_gateway:
|
||||
egress_traffic:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
|
|
@ -31,7 +32,6 @@ properties:
|
|||
type: string
|
||||
enum:
|
||||
- openai
|
||||
- huggingface
|
||||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
|
|
@ -247,5 +247,4 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
- listeners
|
||||
- llm_providers
|
||||
|
|
|
|||
|
|
@ -104,7 +104,9 @@ def validate_and_render_schema():
|
|||
arch_config_string = yaml.dump(config_yaml)
|
||||
arch_llm_config_string = yaml.dump(config_yaml)
|
||||
|
||||
prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
|
||||
prompt_gateway_listener = config_yaml.get("listeners", {}).get(
|
||||
"ingress_traffic", {}
|
||||
)
|
||||
if prompt_gateway_listener.get("port") == None:
|
||||
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
|
||||
if prompt_gateway_listener.get("address") == None:
|
||||
|
|
@ -112,7 +114,7 @@ def validate_and_render_schema():
|
|||
if prompt_gateway_listener.get("timeout") == None:
|
||||
prompt_gateway_listener["timeout"] = "10s"
|
||||
|
||||
llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
|
||||
llm_gateway_listener = config_yaml.get("listeners", {}).get("egress_traffic", {})
|
||||
if llm_gateway_listener.get("port") == None:
|
||||
llm_gateway_listener["port"] = 12000 # default port for llm gateway
|
||||
if llm_gateway_listener.get("address") == None:
|
||||
|
|
|
|||
|
|
@ -143,12 +143,12 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
|
||||
prompt_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("prompt_gateway", {})
|
||||
.get("ingress_traffic", {})
|
||||
.get("port", 10000)
|
||||
)
|
||||
llm_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("llm_gateway", {})
|
||||
.get("egress_traffic", {})
|
||||
.get("port", 12000)
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: v0.1
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: v0.1
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: v0.1
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
llm_providers:
|
||||
- name: gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: v0.1
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
@ -146,9 +146,9 @@ Create ``arch_config.yaml`` file with the following content:
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
|
|
@ -1,14 +1,16 @@
|
|||
version: v0.1
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
llm_gateway:
|
||||
egress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
ingress_traffic:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue