diff --git a/arch/arch_config_schema.yaml b/arch/arch_config_schema.yaml
index b0785e1e..9b63840e 100644
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@@ -150,6 +150,11 @@ properties:
random_sampling:
type: integer
additionalProperties: false
+ mode:
+ type: string
+ enum:
+ - llm
+ - prompt
additionalProperties: false
required:
- version
diff --git a/arch/docker-compose.dev.yaml b/arch/docker-compose.dev.yaml
index 33b692bb..36c364bb 100644
--- a/arch/docker-compose.dev.yaml
+++ b/arch/docker-compose.dev.yaml
@@ -4,6 +4,7 @@ services:
ports:
- "10000:10000"
- "11000:11000"
+ - "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
diff --git a/arch/docker-compose.yaml b/arch/docker-compose.yaml
index 582e5a2f..3860fac0 100644
--- a/arch/docker-compose.yaml
+++ b/arch/docker-compose.yaml
@@ -3,10 +3,12 @@ services:
image: archgw:latest
ports:
- "10000:10000"
+ - "11000:11000"
+ - "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- - ~/archgw_logs/arch_logs:/var/log/
+ - ~/archgw_logs:/var/log/
env_file:
- stage.env
diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index 4dba952c..c6bcedba 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -37,7 +37,7 @@ static_resources:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
- path: "/var/log/arch_access.log"
+ path: "/var/log/access_ingress.log"
route_config:
name: local_routes
virtual_hosts:
@@ -57,12 +57,22 @@ static_resources:
cluster: {{ provider.provider }}
timeout: 60s
{% endfor %}
+ - match:
+ prefix: "/"
+ headers:
+ - name: "x-arch-upstream"
+ string_match:
+ exact: arch_llm_listener
+ route:
+ auto_host_rewrite: true
+ cluster: arch_llm_listener
+ timeout: 60s
- match:
prefix: "/"
direct_response:
status: 400
body:
- inline_string: "x-arch-llm-provider header not set, cannot perform routing\n"
+ inline_string: "x-arch-llm-provider or x-arch-upstream header not set, cannot perform routing\n"
http_filters:
- name: envoy.filters.http.wasm
typed_config:
@@ -71,6 +81,7 @@ static_resources:
value:
config:
name: "http_config"
+ root_id: prompt_gateway
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
@@ -118,7 +129,7 @@ static_resources:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
- path: "/var/log/arch_access_internal.log"
+ path: "/var/log/access_internal.log"
route_config:
name: local_routes
virtual_hosts:
@@ -162,6 +173,88 @@ static_resources:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+
+ - name: arch_listener_llm
+ address:
+ socket_address:
+ address: 0.0.0.0
+ port_value: 12000
+ filter_chains:
+ - filters:
+ - name: envoy.filters.network.http_connection_manager
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+ {% if arch_tracing.random_sampling > 0 %}
+ generate_request_id: true
+ tracing:
+ provider:
+ name: envoy.tracers.opentelemetry
+ typed_config:
+ "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
+ grpc_service:
+ envoy_grpc:
+ cluster_name: opentelemetry_collector
+ timeout: 0.250s
+ service_name: arch
+ random_sampling:
+ value: {{ arch_tracing.random_sampling }}
+ {% endif %}
+ stat_prefix: arch_listener_http
+ codec_type: AUTO
+ scheme_header_transformation:
+ scheme_to_overwrite: https
+ access_log:
+ - name: envoy.access_loggers.file
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
+ path: "/var/log/access_llm.log"
+ route_config:
+ name: local_routes
+ virtual_hosts:
+ - name: local_service
+ domains:
+ - "*"
+ routes:
+ {% for provider in arch_llm_providers %}
+ - match:
+ prefix: "/"
+ headers:
+ - name: "x-arch-llm-provider"
+ string_match:
+ exact: {{ provider.name }}
+ route:
+ auto_host_rewrite: true
+ cluster: {{ provider.provider }}
+ timeout: 60s
+ {% endfor %}
+ - match:
+ prefix: "/"
+ direct_response:
+ status: 400
+ body:
+ inline_string: "x-arch-llm-provider header not set, cannot perform routing\n"
+ http_filters:
+ - name: envoy.filters.http.wasm
+ typed_config:
+ "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+ type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
+ value:
+ config:
+ name: "http_config"
+ root_id: llm_gateway
+ configuration:
+ "@type": "type.googleapis.com/google.protobuf.StringValue"
+ value: |
+ {{ arch_llm_config | indent(32) }}
+ vm_config:
+ runtime: "envoy.wasm.runtime.v8"
+ code:
+ local:
+ filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
+ - name: envoy.filters.http.router
+ typed_config:
+ "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+
clusters:
- name: openai
connect_timeout: 5s
@@ -289,6 +382,22 @@ static_resources:
port_value: 11000
hostname: arch_internal
+ - name: arch_llm_listener
+ connect_timeout: 5s
+ type: LOGICAL_DNS
+ dns_lookup_family: V4_ONLY
+ lb_policy: ROUND_ROBIN
+ load_assignment:
+ cluster_name: arch_llm_listener
+ endpoints:
+ - lb_endpoints:
+ - endpoint:
+ address:
+ socket_address:
+ address: 0.0.0.0
+ port_value: 12000
+ hostname: arch_llm_listener
+
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: opentelemetry_collector
type: STRICT_DNS
diff --git a/arch/src/consts.rs b/arch/src/consts.rs
index 07d38cf8..a3e8e428 100644
--- a/arch/src/consts.rs
+++ b/arch/src/consts.rs
@@ -18,3 +18,4 @@ pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B";
pub const REQUEST_ID_HEADER: &str = "x-request-id";
pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal";
pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream";
+pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener";
diff --git a/arch/src/filter_context.rs b/arch/src/filter_context.rs
index 491484bb..09314ff5 100644
--- a/arch/src/filter_context.rs
+++ b/arch/src/filter_context.rs
@@ -11,7 +11,9 @@ use log::debug;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use public_types::common_types::EmbeddingType;
-use public_types::configuration::{Configuration, Overrides, PromptGuards, PromptTarget};
+use public_types::configuration::{
+ Configuration, GatewayMode, Overrides, PromptGuards, PromptTarget,
+};
use public_types::embeddings::{
CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse,
};
@@ -53,6 +55,7 @@ pub struct FilterContext {
overrides: Rc