admin: address: socket_address: { address: 0.0.0.0, port_value: 9901 } static_resources: listeners: address: socket_address: address: 0.0.0.0 port_value: 10000 filter_chains: - filters: - name: envoy.filters.network.http_connection_manager typed_config: "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager stat_prefix: ingress_http codec_type: AUTO scheme_header_transformation: scheme_to_overwrite: https route_config: name: local_routes virtual_hosts: - name: openai domains: - "api.openai.com" routes: - match: prefix: "/" route: auto_host_rewrite: true cluster: openai - name: local_service domains: - "*" routes: - match: prefix: "/v1/chat/completions" route: auto_host_rewrite: true cluster: openai - match: prefix: "/embeddings" route: cluster: embeddingserver - match: prefix: "/inline" route: cluster: httpbin - match: prefix: "/" direct_response: status: 200 body: inline_string: "Inspect the HTTP header: custom-header.\n" http_filters: - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm value: config: name: "http_config" configuration: "@type": "type.googleapis.com/google.protobuf.StringValue" value: | katanemo-prompt-config: default-prompt-endpoint: "127.0.0.1" load-balancing: "round-robin" timeout-ms: 5000 embedding-provider: name: "SentenceTransformer" model: "all-MiniLM-L6-v2" llm-providers: - name: "open-ai-gpt-4" api-key: "$OPEN_AI_API_KEY" model: gpt-4 system-prompt: | You are a helpful weather forecaster. Please following following guidelines when responding to user queries: - Use farenheight for temperature - Use miles per hour for wind speed prompt-targets: - type: context-resolver name: weather-forecast few-shot-examples: - what is the weather in New York? endpoint: "POST:$WEATHER_FORECAST_API_ENDPOINT" cache-response: true cache-response-settings: - cache-ttl-secs: 3600 # cache expiry in seconds - cache-max-size: 1000 # in number of items - cache-eviction-strategy: LRU vm_config: runtime: "envoy.wasm.runtime.v8" code: local: filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: # LLM Host # Embedding Providers # External LLM Providers - name: openai connect_timeout: 5s type: LOGICAL_DNS lb_policy: ROUND_ROBIN typed_extension_protocol_options: envoy.extensions.upstreams.http.v3.HttpProtocolOptions: "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions explicit_http_config: http2_protocol_options: {} load_assignment: cluster_name: openai endpoints: - lb_endpoints: - endpoint: address: socket_address: address: api.openai.com port_value: 443 hostname: "api.openai.com" transport_socket: name: envoy.transport_sockets.tls typed_config: "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext sni: api.openai.com common_tls_context: tls_params: tls_minimum_protocol_version: TLSv1_2 tls_maximum_protocol_version: TLSv1_3 - name: httpbin connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: httpbin endpoints: - lb_endpoints: - endpoint: address: socket_address: address: httpbin.org port_value: 80 hostname: "httpbin.org" - name: embeddingserver connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: embeddingserver endpoints: - lb_endpoints: - endpoint: address: socket_address: address: embeddingserver port_value: 80 hostname: "embeddingserver" - name: qdrant connect_timeout: 5s type: STRICT_DNS lb_policy: ROUND_ROBIN load_assignment: cluster_name: qdrant endpoints: - lb_endpoints: - endpoint: address: socket_address: address: qdrant port_value: 6333 hostname: "qdrant"