From 9cb04756c5127dec074e5f93d8dae37515f35aef Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 12 Feb 2025 14:48:23 -0800 Subject: [PATCH] fix more --- arch/arch_config_schema.yaml | 45 +++++++++---- arch/envoy.template.yaml | 32 +++++----- arch/tools/cli/config_generator.py | 18 ++++++ arch/tools/cli/core.py | 36 +++++++++-- arch/tools/poetry.lock | 64 ++++++++++++++++++- arch/tools/pyproject.toml | 1 + crates/common/src/configuration.rs | 27 -------- crates/common/src/consts.rs | 8 +-- demos/acm_k8s/arch_config_openshift.yaml | 15 +++-- demos/shared/jaeger/Dockerfile | 2 +- .../source/concepts/includes/arch_config.yaml | 1 - docs/source/guides/includes/arch_config.yaml | 1 - .../includes/arch_config_full_reference.yaml | 21 +++--- 13 files changed, 181 insertions(+), 90 deletions(-) diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml index 1b32b730..d9c9349e 100644 --- a/arch/arch_config_schema.yaml +++ b/arch/arch_config_schema.yaml @@ -3,21 +3,38 @@ type: object properties: version: type: string - listener: + listeners: type: object properties: - address: - type: string - port: - type: integer - message_format: - type: string - connect_timeout: - type: string - additionalProperties: false - required: - - address - - port + prompt_gateway: + type: object + properties: + address: + type: string + port: + type: integer + message_format: + type: string + enum: + - openai + timeout: + type: string + additionalProperties: false + llm_gateway: + type: object + properties: + address: + type: string + port: + type: integer + message_format: + type: string + enum: + - openai + - huggingface + timeout: + type: string + additionalProperties: false endpoints: type: object patternProperties: @@ -224,5 +241,5 @@ properties: additionalProperties: false required: - version - - listener + - listeners - llm_providers diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index 0040b57b..313ece97 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -32,8 +32,8 @@ static_resources: - name: arch_listener_http address: socket_address: - address: 0.0.0.0 - port_value: 10000 + address: {{ prompt_gateway_listener.address }} + port_value: {{ prompt_gateway_listener.port }} traffic_direction: INBOUND filter_chains: - filters: @@ -76,7 +76,7 @@ static_resources: route: auto_host_rewrite: true cluster: arch_prompt_gateway_listener - timeout: 60s + timeout: {{ prompt_gateway_listener.timeout }} http_filters: - name: envoy.filters.http.router typed_config: @@ -273,12 +273,11 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - - name: arch_listener_http_llm address: socket_address: - address: 0.0.0.0 - port_value: 12000 + address: {{ llm_gateway_listener.address }} + port_value: {{ llm_gateway_listener.port }} traffic_direction: INBOUND filter_chains: - filters: @@ -321,13 +320,12 @@ static_resources: route: auto_host_rewrite: true cluster: arch_listener_llm - timeout: 60s + timeout: {{ llm_gateway_listener.timeout }} http_filters: - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - - name: arch_listener_llm address: socket_address: @@ -443,7 +441,7 @@ static_resources: clusters: - name: openai - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -467,7 +465,7 @@ static_resources: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - name: mistral - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -488,7 +486,7 @@ static_resources: sni: api.mistral.ai {% for internal_cluster in ["arch_fc", "model_server"] %} - name: {{ internal_cluster }} - connect_timeout: 5s + connect_timeout: 0.5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -504,7 +502,7 @@ static_resources: hostname: {{ internal_cluster }} {% endfor %} - name: mistral_7b_instruct - connect_timeout: 5s + connect_timeout: 0.5s type: STRICT_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -523,7 +521,7 @@ static_resources: {% if cluster.connect_timeout -%} connect_timeout: {{ cluster.connect_timeout }} {% else -%} - connect_timeout: 5s + connect_timeout: 0.5s {% endif -%} type: LOGICAL_DNS dns_lookup_family: V4_ONLY @@ -557,7 +555,7 @@ static_resources: {% for local_llm_provider in local_llms %} - name: {{ local_llm_provider.name }} - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -589,7 +587,7 @@ static_resources: {% endfor %} - name: arch_internal - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -605,7 +603,7 @@ static_resources: hostname: arch_internal - name: arch_prompt_gateway_listener - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN @@ -621,7 +619,7 @@ static_resources: hostname: arch_prompt_gateway_listener - name: arch_listener_llm - connect_timeout: 5s + connect_timeout: 0.5s type: LOGICAL_DNS dns_lookup_family: V4_ONLY lb_policy: ROUND_ROBIN diff --git a/arch/tools/cli/config_generator.py b/arch/tools/cli/config_generator.py index 447585fb..33ea3c3b 100644 --- a/arch/tools/cli/config_generator.py +++ b/arch/tools/cli/config_generator.py @@ -104,7 +104,25 @@ def validate_and_render_schema(): arch_config_string = yaml.dump(config_yaml) arch_llm_config_string = yaml.dump(config_yaml) + prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {}) + if prompt_gateway_listener.get("port") == None: + prompt_gateway_listener["port"] = 10000 # default port for prompt gateway + if prompt_gateway_listener.get("address") == None: + prompt_gateway_listener["address"] = "127.0.0.1" + if prompt_gateway_listener.get("timeout") == None: + prompt_gateway_listener["timeout"] = "10s" + + llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {}) + if llm_gateway_listener.get("port") == None: + llm_gateway_listener["port"] = 12000 # default port for llm gateway + if llm_gateway_listener.get("address") == None: + llm_gateway_listener["address"] = "127.0.0.1" + if llm_gateway_listener.get("timeout") == None: + llm_gateway_listener["timeout"] = "10s" + data = { + "prompt_gateway_listener": prompt_gateway_listener, + "llm_gateway_listener": llm_gateway_listener, "arch_config": arch_config_string, "arch_llm_config": arch_llm_config_string, "arch_clusters": inferred_clusters, diff --git a/arch/tools/cli/core.py b/arch/tools/cli/core.py index c91120df..4ffbd873 100644 --- a/arch/tools/cli/core.py +++ b/arch/tools/cli/core.py @@ -15,12 +15,15 @@ from cli.consts import ( ) from huggingface_hub import snapshot_download from dotenv import dotenv_values +import yaml log = getLogger(__name__) -def start_archgw_docker(client, arch_config_file, env): +def start_archgw_docker( + client, arch_config_file, env, prompt_gateway_port, llm_gateway_port +): logs_path = "~/archgw_logs" logs_path_abs = os.path.expanduser(logs_path) @@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env): image=ARCHGW_DOCKER_IMAGE, detach=True, # Run in detached mode ports={ - "10000/tcp": 10000, + f"{prompt_gateway_port}/tcp": prompt_gateway_port, "10001/tcp": 10001, "11000/tcp": 11000, - "12000/tcp": 12000, + f"{llm_gateway_port}/tcp": llm_gateway_port, "9901/tcp": 19901, }, volumes={ @@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env): }, extra_hosts={"host.docker.internal": "host-gateway"}, healthcheck={ - "test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"], + "test": [ + "CMD", + "curl", + "-f", + f"http://localhost:{prompt_gateway_port}/healthz", + ], "interval": 5000000000, # 5 seconds "timeout": 1000000000, # 1 seconds "retries": 3, @@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False): except docker.errors.NotFound as e: pass - container = start_archgw_docker(client, arch_config_file, env) + # parse arch_config_file yaml file and get prompt_gateway_port + arch_config_dict = {} + with open(arch_config_file) as f: + arch_config_dict = yaml.safe_load(f) + + prompt_gateway_port = ( + arch_config_dict.get("listeners", {}) + .get("prompt_gateway", {}) + .get("port", 10000) + ) + llm_gateway_port = ( + arch_config_dict.get("listeners", {}) + .get("llm_gateway", {}) + .get("port", 12000) + ) + + container = start_archgw_docker( + client, arch_config_file, env, prompt_gateway_port, llm_gateway_port + ) start_time = time.time() diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock index d5a45050..bcb89dae 100644 --- a/arch/tools/poetry.lock +++ b/arch/tools/poetry.lock @@ -368,6 +368,68 @@ files = [ {file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"}, ] +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + [[package]] name = "referencing" version = "0.36.2" @@ -568,4 +630,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb" +content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502" diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml index 7ed79a36..8ce28cbc 100644 --- a/arch/tools/pyproject.toml +++ b/arch/tools/pyproject.toml @@ -17,6 +17,7 @@ jsonschema = "^4.23.0" setuptools = "75.5.0" docker = "^7.1.0" python-dotenv = "^1.0.1" +pyyaml = "^6.0.2" [tool.poetry.scripts] archgw = "cli.main:main" diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 069695ba..acd97fc3 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -9,7 +9,6 @@ use crate::api::open_ai::{ #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Configuration { pub version: String, - pub listener: Listener, pub endpoints: Option>, pub llm_providers: Vec, pub overrides: Option, @@ -48,32 +47,6 @@ pub struct ErrorTargetDetail { pub endpoint: Option, } -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Listener { - pub address: String, - pub port: u16, - pub message_format: MessageFormat, - // pub connect_timeout: Option, -} - -impl Default for Listener { - fn default() -> Self { - Listener { - address: "".to_string(), - port: 0, - message_format: MessageFormat::default(), - // connect_timeout: None, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, Default)] -pub enum MessageFormat { - #[serde(rename = "huggingface")] - #[default] - Huggingface, -} - #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct PromptGuards { pub input_guards: HashMap, diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index 630ed6d5..cd52220e 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; pub const TOOL_ROLE: &str = "tool"; pub const ASSISTANT_ROLE: &str = "assistant"; -pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes -pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes -pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes -pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes +pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds +pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds +pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds +pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds pub const MODEL_SERVER_NAME: &str = "model_server"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; diff --git a/demos/acm_k8s/arch_config_openshift.yaml b/demos/acm_k8s/arch_config_openshift.yaml index 53352db4..d151a95e 100644 --- a/demos/acm_k8s/arch_config_openshift.yaml +++ b/demos/acm_k8s/arch_config_openshift.yaml @@ -1,23 +1,24 @@ version: "0.1-beta" -listener: - address: 0.0.0.0 - port: 10000 - message_format: huggingface - connect_timeout: 0.005s +listeners: + prompt_gateway: + address: 0.0.0.0 + message_format: openai + timeout: 30s overrides: # confidence threshold for prompt target intent matching prompt_target_intent_matching_threshold: 0.6 + optimize_context_window: true endpoints: acm_service: endpoint: host.docker.internal:8001 - connect_timeout: 0.005s + connect_timeout: 0.25s http_host: localhost local_proxy_service: endpoint: host.docker.internal:8002 - connect_timeout: 0.005s + connect_timeout: 0.25s http_host: localhost llm_providers: diff --git a/demos/shared/jaeger/Dockerfile b/demos/shared/jaeger/Dockerfile index 64fd1404..ee50d82e 100644 --- a/demos/shared/jaeger/Dockerfile +++ b/demos/shared/jaeger/Dockerfile @@ -1,4 +1,4 @@ -FROM jaegertracing/all-in-one:1.62.0 +FROM jaegertracing/jaeger:2.3.0 HEALTHCHECK \ --interval=1s \ --timeout=1s \ diff --git a/docs/source/concepts/includes/arch_config.yaml b/docs/source/concepts/includes/arch_config.yaml index c78f35f7..93164401 100644 --- a/docs/source/concepts/includes/arch_config.yaml +++ b/docs/source/concepts/includes/arch_config.yaml @@ -13,7 +13,6 @@ llm_providers: access_key: $OPENAI_API_KEY model: gpt-4o default: true - stream: true # default system prompt used by all prompt targets system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. diff --git a/docs/source/guides/includes/arch_config.yaml b/docs/source/guides/includes/arch_config.yaml index 415c74aa..33c1748c 100644 --- a/docs/source/guides/includes/arch_config.yaml +++ b/docs/source/guides/includes/arch_config.yaml @@ -13,7 +13,6 @@ llm_providers: access_key: $OPENAI_API_KEY model: gpt-4o default: true - stream: true # default system prompt used by all prompt targets system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions. diff --git a/docs/source/resources/includes/arch_config_full_reference.yaml b/docs/source/resources/includes/arch_config_full_reference.yaml index f21fc1f5..2389389c 100644 --- a/docs/source/resources/includes/arch_config_full_reference.yaml +++ b/docs/source/resources/includes/arch_config_full_reference.yaml @@ -1,16 +1,14 @@ version: v0.1 -listener: - address: 0.0.0.0 # or 127.0.0.1 - port: 10000 - # Defines how Arch should parse the content from application/json or text/pain Content-type in the http request - message_format: huggingface - common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates - tls_certificates: - - certificate_chain: - filename: /etc/certs/cert.pem - private_key: - filename: /etc/certs/key.pem +listeners: + prompt_gateway: + address: 0.0.0.0 + port: 10000 + message_format: openai + timeout: 5s + llm_gateway: + address: 0.0.0.0 + port: 12000 # Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem. endpoints: @@ -35,7 +33,6 @@ llm_providers: access_key: $OPENAI_API_KEY model: gpt-4o default: true - stream: true rate_limits: selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys http_header: