2024-07-16 14:50:32 -07:00
|
|
|
admin:
|
|
|
|
|
address:
|
|
|
|
|
socket_address: { address: 0.0.0.0, port_value: 9901 }
|
2024-07-10 10:06:02 -07:00
|
|
|
static_resources:
|
|
|
|
|
listeners:
|
|
|
|
|
address:
|
|
|
|
|
socket_address:
|
|
|
|
|
address: 0.0.0.0
|
|
|
|
|
port_value: 10000
|
|
|
|
|
filter_chains:
|
|
|
|
|
- filters:
|
|
|
|
|
- name: envoy.filters.network.http_connection_manager
|
|
|
|
|
typed_config:
|
|
|
|
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
|
|
|
|
stat_prefix: ingress_http
|
|
|
|
|
codec_type: AUTO
|
2024-07-19 13:14:48 -07:00
|
|
|
scheme_header_transformation:
|
|
|
|
|
scheme_to_overwrite: https
|
2024-07-10 10:06:02 -07:00
|
|
|
route_config:
|
|
|
|
|
name: local_routes
|
|
|
|
|
virtual_hosts:
|
2024-07-19 13:14:48 -07:00
|
|
|
- name: openai
|
|
|
|
|
domains:
|
|
|
|
|
- "api.openai.com"
|
|
|
|
|
routes:
|
|
|
|
|
- match:
|
|
|
|
|
prefix: "/"
|
|
|
|
|
route:
|
|
|
|
|
auto_host_rewrite: true
|
|
|
|
|
cluster: openai
|
2024-07-10 10:06:02 -07:00
|
|
|
- name: local_service
|
|
|
|
|
domains:
|
|
|
|
|
- "*"
|
|
|
|
|
routes:
|
2024-07-19 13:14:48 -07:00
|
|
|
- match:
|
|
|
|
|
prefix: "/v1/chat/completions"
|
|
|
|
|
route:
|
|
|
|
|
auto_host_rewrite: true
|
|
|
|
|
cluster: openai
|
2024-07-18 14:04:51 -07:00
|
|
|
- match:
|
|
|
|
|
prefix: "/embeddings"
|
|
|
|
|
route:
|
|
|
|
|
cluster: embeddingserver
|
2024-07-10 10:06:02 -07:00
|
|
|
- match:
|
|
|
|
|
prefix: "/inline"
|
|
|
|
|
route:
|
|
|
|
|
cluster: httpbin
|
|
|
|
|
- match:
|
|
|
|
|
prefix: "/"
|
|
|
|
|
direct_response:
|
|
|
|
|
status: 200
|
|
|
|
|
body:
|
|
|
|
|
inline_string: "Inspect the HTTP header: custom-header.\n"
|
|
|
|
|
http_filters:
|
|
|
|
|
- name: envoy.filters.http.wasm
|
|
|
|
|
typed_config:
|
|
|
|
|
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
|
|
|
|
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
|
|
|
|
value:
|
|
|
|
|
config:
|
|
|
|
|
name: "http_config"
|
|
|
|
|
configuration:
|
|
|
|
|
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
2024-07-16 14:50:32 -07:00
|
|
|
value: |
|
|
|
|
|
katanemo-prompt-config:
|
|
|
|
|
default-prompt-endpoint: "127.0.0.1"
|
|
|
|
|
load-balancing: "round-robin"
|
|
|
|
|
timeout-ms: 5000
|
|
|
|
|
|
|
|
|
|
embedding-provider:
|
|
|
|
|
name: "SentenceTransformer"
|
|
|
|
|
model: "all-MiniLM-L6-v2"
|
|
|
|
|
|
|
|
|
|
llm-providers:
|
|
|
|
|
|
|
|
|
|
- name: "open-ai-gpt-4"
|
|
|
|
|
api-key: "$OPEN_AI_API_KEY"
|
|
|
|
|
model: gpt-4
|
|
|
|
|
|
|
|
|
|
system-prompt: |
|
|
|
|
|
You are a helpful weather forecaster. Please following following guidelines when responding to user queries:
|
|
|
|
|
- Use farenheight for temperature
|
|
|
|
|
- Use miles per hour for wind speed
|
|
|
|
|
|
|
|
|
|
prompt-targets:
|
|
|
|
|
|
|
|
|
|
- type: context-resolver
|
|
|
|
|
name: weather-forecast
|
|
|
|
|
few-shot-examples:
|
|
|
|
|
- what is the weather in New York?
|
|
|
|
|
endpoint: "POST:$WEATHER_FORECAST_API_ENDPOINT"
|
|
|
|
|
cache-response: true
|
|
|
|
|
cache-response-settings:
|
|
|
|
|
- cache-ttl-secs: 3600 # cache expiry in seconds
|
|
|
|
|
- cache-max-size: 1000 # in number of items
|
|
|
|
|
- cache-eviction-strategy: LRU
|
|
|
|
|
|
2024-07-10 10:06:02 -07:00
|
|
|
vm_config:
|
|
|
|
|
runtime: "envoy.wasm.runtime.v8"
|
|
|
|
|
code:
|
|
|
|
|
local:
|
|
|
|
|
filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
|
|
|
|
|
- name: envoy.filters.http.router
|
|
|
|
|
typed_config:
|
|
|
|
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
|
|
|
|
clusters:
|
2024-07-19 13:14:48 -07:00
|
|
|
# LLM Host
|
|
|
|
|
# Embedding Providers
|
|
|
|
|
# External LLM Providers
|
|
|
|
|
- name: openai
|
|
|
|
|
connect_timeout: 5s
|
|
|
|
|
type: LOGICAL_DNS
|
|
|
|
|
lb_policy: ROUND_ROBIN
|
|
|
|
|
typed_extension_protocol_options:
|
|
|
|
|
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
|
|
|
|
|
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
|
|
|
|
|
explicit_http_config:
|
|
|
|
|
http2_protocol_options: {}
|
|
|
|
|
load_assignment:
|
|
|
|
|
cluster_name: openai
|
|
|
|
|
endpoints:
|
|
|
|
|
- lb_endpoints:
|
|
|
|
|
- endpoint:
|
|
|
|
|
address:
|
|
|
|
|
socket_address:
|
|
|
|
|
address: api.openai.com
|
|
|
|
|
port_value: 443
|
|
|
|
|
hostname: "api.openai.com"
|
|
|
|
|
transport_socket:
|
|
|
|
|
name: envoy.transport_sockets.tls
|
|
|
|
|
typed_config:
|
|
|
|
|
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
|
|
|
|
sni: api.openai.com
|
|
|
|
|
common_tls_context:
|
|
|
|
|
tls_params:
|
|
|
|
|
tls_minimum_protocol_version: TLSv1_2
|
|
|
|
|
tls_maximum_protocol_version: TLSv1_3
|
|
|
|
|
|
2024-07-10 10:06:02 -07:00
|
|
|
- name: httpbin
|
|
|
|
|
connect_timeout: 5s
|
|
|
|
|
type: STRICT_DNS
|
|
|
|
|
lb_policy: ROUND_ROBIN
|
|
|
|
|
load_assignment:
|
|
|
|
|
cluster_name: httpbin
|
|
|
|
|
endpoints:
|
|
|
|
|
- lb_endpoints:
|
|
|
|
|
- endpoint:
|
|
|
|
|
address:
|
|
|
|
|
socket_address:
|
|
|
|
|
address: httpbin.org
|
|
|
|
|
port_value: 80
|
|
|
|
|
hostname: "httpbin.org"
|
2024-07-18 14:04:51 -07:00
|
|
|
- name: embeddingserver
|
|
|
|
|
connect_timeout: 5s
|
|
|
|
|
type: STRICT_DNS
|
|
|
|
|
lb_policy: ROUND_ROBIN
|
|
|
|
|
load_assignment:
|
|
|
|
|
cluster_name: embeddingserver
|
|
|
|
|
endpoints:
|
|
|
|
|
- lb_endpoints:
|
|
|
|
|
- endpoint:
|
|
|
|
|
address:
|
|
|
|
|
socket_address:
|
|
|
|
|
address: embeddingserver
|
|
|
|
|
port_value: 80
|
|
|
|
|
hostname: "embeddingserver"
|
|
|
|
|
- name: qdrant
|
|
|
|
|
connect_timeout: 5s
|
|
|
|
|
type: STRICT_DNS
|
|
|
|
|
lb_policy: ROUND_ROBIN
|
|
|
|
|
load_assignment:
|
|
|
|
|
cluster_name: qdrant
|
|
|
|
|
endpoints:
|
|
|
|
|
- lb_endpoints:
|
|
|
|
|
- endpoint:
|
|
|
|
|
address:
|
|
|
|
|
socket_address:
|
|
|
|
|
address: qdrant
|
|
|
|
|
port_value: 6333
|
|
|
|
|
hostname: "qdrant"
|