mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix more
This commit is contained in:
parent
d2ad943f63
commit
9cb04756c5
13 changed files with 181 additions and 90 deletions
|
|
@ -3,21 +3,38 @@ type: object
|
|||
properties:
|
||||
version:
|
||||
type: string
|
||||
listener:
|
||||
listeners:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
connect_timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- address
|
||||
- port
|
||||
prompt_gateway:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
enum:
|
||||
- openai
|
||||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
llm_gateway:
|
||||
type: object
|
||||
properties:
|
||||
address:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
message_format:
|
||||
type: string
|
||||
enum:
|
||||
- openai
|
||||
- huggingface
|
||||
timeout:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
endpoints:
|
||||
type: object
|
||||
patternProperties:
|
||||
|
|
@ -224,5 +241,5 @@ properties:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- version
|
||||
- listener
|
||||
- listeners
|
||||
- llm_providers
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ static_resources:
|
|||
- name: arch_listener_http
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 10000
|
||||
address: {{ prompt_gateway_listener.address }}
|
||||
port_value: {{ prompt_gateway_listener.port }}
|
||||
traffic_direction: INBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
|
|
@ -76,7 +76,7 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_prompt_gateway_listener
|
||||
timeout: 60s
|
||||
timeout: {{ prompt_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
|
|
@ -273,12 +273,11 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
- name: arch_listener_http_llm
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 12000
|
||||
address: {{ llm_gateway_listener.address }}
|
||||
port_value: {{ llm_gateway_listener.port }}
|
||||
traffic_direction: INBOUND
|
||||
filter_chains:
|
||||
- filters:
|
||||
|
|
@ -321,13 +320,12 @@ static_resources:
|
|||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: arch_listener_llm
|
||||
timeout: 60s
|
||||
timeout: {{ llm_gateway_listener.timeout }}
|
||||
http_filters:
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
||||
|
||||
- name: arch_listener_llm
|
||||
address:
|
||||
socket_address:
|
||||
|
|
@ -443,7 +441,7 @@ static_resources:
|
|||
|
||||
clusters:
|
||||
- name: openai
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -467,7 +465,7 @@ static_resources:
|
|||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
- name: mistral
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -488,7 +486,7 @@ static_resources:
|
|||
sni: api.mistral.ai
|
||||
{% for internal_cluster in ["arch_fc", "model_server"] %}
|
||||
- name: {{ internal_cluster }}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -504,7 +502,7 @@ static_resources:
|
|||
hostname: {{ internal_cluster }}
|
||||
{% endfor %}
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -523,7 +521,7 @@ static_resources:
|
|||
{% if cluster.connect_timeout -%}
|
||||
connect_timeout: {{ cluster.connect_timeout }}
|
||||
{% else -%}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
{% endif -%}
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
|
|
@ -557,7 +555,7 @@ static_resources:
|
|||
|
||||
{% for local_llm_provider in local_llms %}
|
||||
- name: {{ local_llm_provider.name }}
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -589,7 +587,7 @@ static_resources:
|
|||
|
||||
{% endfor %}
|
||||
- name: arch_internal
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -605,7 +603,7 @@ static_resources:
|
|||
hostname: arch_internal
|
||||
|
||||
- name: arch_prompt_gateway_listener
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
@ -621,7 +619,7 @@ static_resources:
|
|||
hostname: arch_prompt_gateway_listener
|
||||
|
||||
- name: arch_listener_llm
|
||||
connect_timeout: 5s
|
||||
connect_timeout: 0.5s
|
||||
type: LOGICAL_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
|
|
|
|||
|
|
@ -104,7 +104,25 @@ def validate_and_render_schema():
|
|||
arch_config_string = yaml.dump(config_yaml)
|
||||
arch_llm_config_string = yaml.dump(config_yaml)
|
||||
|
||||
prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
|
||||
if prompt_gateway_listener.get("port") == None:
|
||||
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
|
||||
if prompt_gateway_listener.get("address") == None:
|
||||
prompt_gateway_listener["address"] = "127.0.0.1"
|
||||
if prompt_gateway_listener.get("timeout") == None:
|
||||
prompt_gateway_listener["timeout"] = "10s"
|
||||
|
||||
llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
|
||||
if llm_gateway_listener.get("port") == None:
|
||||
llm_gateway_listener["port"] = 12000 # default port for llm gateway
|
||||
if llm_gateway_listener.get("address") == None:
|
||||
llm_gateway_listener["address"] = "127.0.0.1"
|
||||
if llm_gateway_listener.get("timeout") == None:
|
||||
llm_gateway_listener["timeout"] = "10s"
|
||||
|
||||
data = {
|
||||
"prompt_gateway_listener": prompt_gateway_listener,
|
||||
"llm_gateway_listener": llm_gateway_listener,
|
||||
"arch_config": arch_config_string,
|
||||
"arch_llm_config": arch_llm_config_string,
|
||||
"arch_clusters": inferred_clusters,
|
||||
|
|
|
|||
|
|
@ -15,12 +15,15 @@ from cli.consts import (
|
|||
)
|
||||
from huggingface_hub import snapshot_download
|
||||
from dotenv import dotenv_values
|
||||
import yaml
|
||||
|
||||
|
||||
log = getLogger(__name__)
|
||||
|
||||
|
||||
def start_archgw_docker(client, arch_config_file, env):
|
||||
def start_archgw_docker(
|
||||
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
|
||||
):
|
||||
logs_path = "~/archgw_logs"
|
||||
logs_path_abs = os.path.expanduser(logs_path)
|
||||
|
||||
|
|
@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env):
|
|||
image=ARCHGW_DOCKER_IMAGE,
|
||||
detach=True, # Run in detached mode
|
||||
ports={
|
||||
"10000/tcp": 10000,
|
||||
f"{prompt_gateway_port}/tcp": prompt_gateway_port,
|
||||
"10001/tcp": 10001,
|
||||
"11000/tcp": 11000,
|
||||
"12000/tcp": 12000,
|
||||
f"{llm_gateway_port}/tcp": llm_gateway_port,
|
||||
"9901/tcp": 19901,
|
||||
},
|
||||
volumes={
|
||||
|
|
@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env):
|
|||
},
|
||||
extra_hosts={"host.docker.internal": "host-gateway"},
|
||||
healthcheck={
|
||||
"test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
|
||||
"test": [
|
||||
"CMD",
|
||||
"curl",
|
||||
"-f",
|
||||
f"http://localhost:{prompt_gateway_port}/healthz",
|
||||
],
|
||||
"interval": 5000000000, # 5 seconds
|
||||
"timeout": 1000000000, # 1 seconds
|
||||
"retries": 3,
|
||||
|
|
@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
|
|||
except docker.errors.NotFound as e:
|
||||
pass
|
||||
|
||||
container = start_archgw_docker(client, arch_config_file, env)
|
||||
# parse arch_config_file yaml file and get prompt_gateway_port
|
||||
arch_config_dict = {}
|
||||
with open(arch_config_file) as f:
|
||||
arch_config_dict = yaml.safe_load(f)
|
||||
|
||||
prompt_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("prompt_gateway", {})
|
||||
.get("port", 10000)
|
||||
)
|
||||
llm_gateway_port = (
|
||||
arch_config_dict.get("listeners", {})
|
||||
.get("llm_gateway", {})
|
||||
.get("port", 12000)
|
||||
)
|
||||
|
||||
container = start_archgw_docker(
|
||||
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
|
||||
)
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
|
|
|
|||
64
arch/tools/poetry.lock
generated
64
arch/tools/poetry.lock
generated
|
|
@ -368,6 +368,68 @@ files = [
|
|||
{file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyyaml"
|
||||
version = "6.0.2"
|
||||
description = "YAML parser and emitter for Python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
|
||||
{file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
|
||||
{file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
|
||||
{file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
|
||||
{file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
|
||||
{file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
|
||||
{file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
|
||||
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "referencing"
|
||||
version = "0.36.2"
|
||||
|
|
@ -568,4 +630,4 @@ zstd = ["zstandard (>=0.18.0)"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb"
|
||||
content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ jsonschema = "^4.23.0"
|
|||
setuptools = "75.5.0"
|
||||
docker = "^7.1.0"
|
||||
python-dotenv = "^1.0.1"
|
||||
pyyaml = "^6.0.2"
|
||||
|
||||
[tool.poetry.scripts]
|
||||
archgw = "cli.main:main"
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ use crate::api::open_ai::{
|
|||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Configuration {
|
||||
pub version: String,
|
||||
pub listener: Listener,
|
||||
pub endpoints: Option<HashMap<String, Endpoint>>,
|
||||
pub llm_providers: Vec<LlmProvider>,
|
||||
pub overrides: Option<Overrides>,
|
||||
|
|
@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
|
|||
pub endpoint: Option<EndpointDetails>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Listener {
|
||||
pub address: String,
|
||||
pub port: u16,
|
||||
pub message_format: MessageFormat,
|
||||
// pub connect_timeout: Option<DurationString>,
|
||||
}
|
||||
|
||||
impl Default for Listener {
|
||||
fn default() -> Self {
|
||||
Listener {
|
||||
address: "".to_string(),
|
||||
port: 0,
|
||||
message_format: MessageFormat::default(),
|
||||
// connect_timeout: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub enum MessageFormat {
|
||||
#[serde(rename = "huggingface")]
|
||||
#[default]
|
||||
Huggingface,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
pub struct PromptGuards {
|
||||
pub input_guards: HashMap<GuardType, GuardOptions>,
|
||||
|
|
|
|||
|
|
@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
|
|||
pub const USER_ROLE: &str = "user";
|
||||
pub const TOOL_ROLE: &str = "tool";
|
||||
pub const ASSISTANT_ROLE: &str = "assistant";
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
|
||||
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
|
||||
pub const MODEL_SERVER_NAME: &str = "model_server";
|
||||
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
|
||||
pub const MESSAGES_KEY: &str = "messages";
|
||||
|
|
|
|||
|
|
@ -1,23 +1,24 @@
|
|||
version: "0.1-beta"
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: huggingface
|
||||
connect_timeout: 0.005s
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
message_format: openai
|
||||
timeout: 30s
|
||||
|
||||
overrides:
|
||||
# confidence threshold for prompt target intent matching
|
||||
prompt_target_intent_matching_threshold: 0.6
|
||||
optimize_context_window: true
|
||||
|
||||
endpoints:
|
||||
acm_service:
|
||||
endpoint: host.docker.internal:8001
|
||||
connect_timeout: 0.005s
|
||||
connect_timeout: 0.25s
|
||||
http_host: localhost
|
||||
local_proxy_service:
|
||||
endpoint: host.docker.internal:8002
|
||||
connect_timeout: 0.005s
|
||||
connect_timeout: 0.25s
|
||||
http_host: localhost
|
||||
|
||||
llm_providers:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM jaegertracing/all-in-one:1.62.0
|
||||
FROM jaegertracing/jaeger:2.3.0
|
||||
HEALTHCHECK \
|
||||
--interval=1s \
|
||||
--timeout=1s \
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
|
||||
# default system prompt used by all prompt targets
|
||||
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.
|
||||
|
|
|
|||
|
|
@ -1,16 +1,14 @@
|
|||
version: v0.1
|
||||
|
||||
listener:
|
||||
address: 0.0.0.0 # or 127.0.0.1
|
||||
port: 10000
|
||||
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
|
||||
message_format: huggingface
|
||||
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
|
||||
tls_certificates:
|
||||
- certificate_chain:
|
||||
filename: /etc/certs/cert.pem
|
||||
private_key:
|
||||
filename: /etc/certs/key.pem
|
||||
listeners:
|
||||
prompt_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 10000
|
||||
message_format: openai
|
||||
timeout: 5s
|
||||
llm_gateway:
|
||||
address: 0.0.0.0
|
||||
port: 12000
|
||||
|
||||
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
|
||||
endpoints:
|
||||
|
|
@ -35,7 +33,6 @@ llm_providers:
|
|||
access_key: $OPENAI_API_KEY
|
||||
model: gpt-4o
|
||||
default: true
|
||||
stream: true
|
||||
rate_limits:
|
||||
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
|
||||
http_header:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue