This commit is contained in:
Adil Hafeez 2025-02-12 14:48:23 -08:00
parent d2ad943f63
commit 9cb04756c5
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
13 changed files with 181 additions and 90 deletions

View file

@ -3,21 +3,38 @@ type: object
properties:
version:
type: string
listener:
listeners:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
connect_timeout:
type: string
additionalProperties: false
required:
- address
- port
prompt_gateway:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
timeout:
type: string
additionalProperties: false
llm_gateway:
type: object
properties:
address:
type: string
port:
type: integer
message_format:
type: string
enum:
- openai
- huggingface
timeout:
type: string
additionalProperties: false
endpoints:
type: object
patternProperties:
@ -224,5 +241,5 @@ properties:
additionalProperties: false
required:
- version
- listener
- listeners
- llm_providers

View file

@ -32,8 +32,8 @@ static_resources:
- name: arch_listener_http
address:
socket_address:
address: 0.0.0.0
port_value: 10000
address: {{ prompt_gateway_listener.address }}
port_value: {{ prompt_gateway_listener.port }}
traffic_direction: INBOUND
filter_chains:
- filters:
@ -76,7 +76,7 @@ static_resources:
route:
auto_host_rewrite: true
cluster: arch_prompt_gateway_listener
timeout: 60s
timeout: {{ prompt_gateway_listener.timeout }}
http_filters:
- name: envoy.filters.http.router
typed_config:
@ -273,12 +273,11 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_http_llm
address:
socket_address:
address: 0.0.0.0
port_value: 12000
address: {{ llm_gateway_listener.address }}
port_value: {{ llm_gateway_listener.port }}
traffic_direction: INBOUND
filter_chains:
- filters:
@ -321,13 +320,12 @@ static_resources:
route:
auto_host_rewrite: true
cluster: arch_listener_llm
timeout: 60s
timeout: {{ llm_gateway_listener.timeout }}
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
address:
socket_address:
@ -443,7 +441,7 @@ static_resources:
clusters:
- name: openai
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -467,7 +465,7 @@ static_resources:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -488,7 +486,7 @@ static_resources:
sni: api.mistral.ai
{% for internal_cluster in ["arch_fc", "model_server"] %}
- name: {{ internal_cluster }}
connect_timeout: 5s
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -504,7 +502,7 @@ static_resources:
hostname: {{ internal_cluster }}
{% endfor %}
- name: mistral_7b_instruct
connect_timeout: 5s
connect_timeout: 0.5s
type: STRICT_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -523,7 +521,7 @@ static_resources:
{% if cluster.connect_timeout -%}
connect_timeout: {{ cluster.connect_timeout }}
{% else -%}
connect_timeout: 5s
connect_timeout: 0.5s
{% endif -%}
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
@ -557,7 +555,7 @@ static_resources:
{% for local_llm_provider in local_llms %}
- name: {{ local_llm_provider.name }}
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -589,7 +587,7 @@ static_resources:
{% endfor %}
- name: arch_internal
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -605,7 +603,7 @@ static_resources:
hostname: arch_internal
- name: arch_prompt_gateway_listener
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
@ -621,7 +619,7 @@ static_resources:
hostname: arch_prompt_gateway_listener
- name: arch_listener_llm
connect_timeout: 5s
connect_timeout: 0.5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN

View file

@ -104,7 +104,25 @@ def validate_and_render_schema():
arch_config_string = yaml.dump(config_yaml)
arch_llm_config_string = yaml.dump(config_yaml)
prompt_gateway_listener = config_yaml.get("listeners", {}).get("prompt_gateway", {})
if prompt_gateway_listener.get("port") == None:
prompt_gateway_listener["port"] = 10000 # default port for prompt gateway
if prompt_gateway_listener.get("address") == None:
prompt_gateway_listener["address"] = "127.0.0.1"
if prompt_gateway_listener.get("timeout") == None:
prompt_gateway_listener["timeout"] = "10s"
llm_gateway_listener = config_yaml.get("listeners", {}).get("llm_gateway", {})
if llm_gateway_listener.get("port") == None:
llm_gateway_listener["port"] = 12000 # default port for llm gateway
if llm_gateway_listener.get("address") == None:
llm_gateway_listener["address"] = "127.0.0.1"
if llm_gateway_listener.get("timeout") == None:
llm_gateway_listener["timeout"] = "10s"
data = {
"prompt_gateway_listener": prompt_gateway_listener,
"llm_gateway_listener": llm_gateway_listener,
"arch_config": arch_config_string,
"arch_llm_config": arch_llm_config_string,
"arch_clusters": inferred_clusters,

View file

@ -15,12 +15,15 @@ from cli.consts import (
)
from huggingface_hub import snapshot_download
from dotenv import dotenv_values
import yaml
log = getLogger(__name__)
def start_archgw_docker(client, arch_config_file, env):
def start_archgw_docker(
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
):
logs_path = "~/archgw_logs"
logs_path_abs = os.path.expanduser(logs_path)
@ -29,10 +32,10 @@ def start_archgw_docker(client, arch_config_file, env):
image=ARCHGW_DOCKER_IMAGE,
detach=True, # Run in detached mode
ports={
"10000/tcp": 10000,
f"{prompt_gateway_port}/tcp": prompt_gateway_port,
"10001/tcp": 10001,
"11000/tcp": 11000,
"12000/tcp": 12000,
f"{llm_gateway_port}/tcp": llm_gateway_port,
"9901/tcp": 19901,
},
volumes={
@ -50,7 +53,12 @@ def start_archgw_docker(client, arch_config_file, env):
},
extra_hosts={"host.docker.internal": "host-gateway"},
healthcheck={
"test": ["CMD", "curl", "-f", "http://localhost:10000/healthz"],
"test": [
"CMD",
"curl",
"-f",
f"http://localhost:{prompt_gateway_port}/healthz",
],
"interval": 5000000000, # 5 seconds
"timeout": 1000000000, # 1 seconds
"retries": 3,
@ -128,7 +136,25 @@ def start_arch(arch_config_file, env, log_timeout=120, foreground=False):
except docker.errors.NotFound as e:
pass
container = start_archgw_docker(client, arch_config_file, env)
# parse arch_config_file yaml file and get prompt_gateway_port
arch_config_dict = {}
with open(arch_config_file) as f:
arch_config_dict = yaml.safe_load(f)
prompt_gateway_port = (
arch_config_dict.get("listeners", {})
.get("prompt_gateway", {})
.get("port", 10000)
)
llm_gateway_port = (
arch_config_dict.get("listeners", {})
.get("llm_gateway", {})
.get("port", 12000)
)
container = start_archgw_docker(
client, arch_config_file, env, prompt_gateway_port, llm_gateway_port
)
start_time = time.time()

64
arch/tools/poetry.lock generated
View file

@ -368,6 +368,68 @@ files = [
{file = "pywin32-308-cp39-cp39-win_amd64.whl", hash = "sha256:71b3322d949b4cc20776436a9c9ba0eeedcbc9c650daa536df63f0ff111bb920"},
]
[[package]]
name = "pyyaml"
version = "6.0.2"
description = "YAML parser and emitter for Python"
optional = false
python-versions = ">=3.8"
files = [
{file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
{file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"},
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"},
{file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"},
{file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"},
{file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"},
{file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"},
{file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"},
{file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"},
{file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"},
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"},
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"},
{file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"},
{file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"},
{file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"},
{file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"},
{file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"},
{file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"},
{file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"},
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"},
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"},
{file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"},
{file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"},
{file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"},
{file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"},
{file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"},
{file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"},
{file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"},
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"},
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"},
{file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"},
{file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"},
{file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"},
{file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"},
{file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"},
{file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"},
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"},
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"},
{file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"},
{file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"},
{file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"},
{file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"},
{file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"},
{file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"},
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"},
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"},
{file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"},
{file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"},
{file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"},
{file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"},
{file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
]
[[package]]
name = "referencing"
version = "0.36.2"
@ -568,4 +630,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb"
content-hash = "d02e43f0884294d48736e1b8df248f47af480baffcbb7a0194da4e16cc1ea502"

View file

@ -17,6 +17,7 @@ jsonschema = "^4.23.0"
setuptools = "75.5.0"
docker = "^7.1.0"
python-dotenv = "^1.0.1"
pyyaml = "^6.0.2"
[tool.poetry.scripts]
archgw = "cli.main:main"

View file

@ -9,7 +9,6 @@ use crate::api::open_ai::{
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Configuration {
pub version: String,
pub listener: Listener,
pub endpoints: Option<HashMap<String, Endpoint>>,
pub llm_providers: Vec<LlmProvider>,
pub overrides: Option<Overrides>,
@ -48,32 +47,6 @@ pub struct ErrorTargetDetail {
pub endpoint: Option<EndpointDetails>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Listener {
pub address: String,
pub port: u16,
pub message_format: MessageFormat,
// pub connect_timeout: Option<DurationString>,
}
impl Default for Listener {
fn default() -> Self {
Listener {
address: "".to_string(),
port: 0,
message_format: MessageFormat::default(),
// connect_timeout: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub enum MessageFormat {
#[serde(rename = "huggingface")]
#[default]
Huggingface,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct PromptGuards {
pub input_guards: HashMap<GuardType, GuardOptions>,

View file

@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system";
pub const USER_ROLE: &str = "user";
pub const TOOL_ROLE: &str = "tool";
pub const ASSISTANT_ROLE: &str = "assistant";
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const API_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds
pub const MODEL_SERVER_NAME: &str = "model_server";
pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
pub const MESSAGES_KEY: &str = "messages";

View file

@ -1,23 +1,24 @@
version: "0.1-beta"
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
listeners:
prompt_gateway:
address: 0.0.0.0
message_format: openai
timeout: 30s
overrides:
# confidence threshold for prompt target intent matching
prompt_target_intent_matching_threshold: 0.6
optimize_context_window: true
endpoints:
acm_service:
endpoint: host.docker.internal:8001
connect_timeout: 0.005s
connect_timeout: 0.25s
http_host: localhost
local_proxy_service:
endpoint: host.docker.internal:8002
connect_timeout: 0.005s
connect_timeout: 0.25s
http_host: localhost
llm_providers:

View file

@ -1,4 +1,4 @@
FROM jaegertracing/all-in-one:1.62.0
FROM jaegertracing/jaeger:2.3.0
HEALTHCHECK \
--interval=1s \
--timeout=1s \

View file

@ -13,7 +13,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.

View file

@ -13,7 +13,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
# default system prompt used by all prompt targets
system_prompt: You are a network assistant that just offers facts; not advice on manufacturers or purchasing decisions.

View file

@ -1,16 +1,14 @@
version: v0.1
listener:
address: 0.0.0.0 # or 127.0.0.1
port: 10000
# Defines how Arch should parse the content from application/json or text/pain Content-type in the http request
message_format: huggingface
common_tls_context: # If you configure port 443, you'll need to update the listener with your TLS certificates
tls_certificates:
- certificate_chain:
filename: /etc/certs/cert.pem
private_key:
filename: /etc/certs/key.pem
listeners:
prompt_gateway:
address: 0.0.0.0
port: 10000
message_format: openai
timeout: 5s
llm_gateway:
address: 0.0.0.0
port: 12000
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
@ -35,7 +33,6 @@ llm_providers:
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
stream: true
rate_limits:
selector: #optional headers, to add rate limiting based on http headers like JWT tokens or API keys
http_header: