admin:
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
  listeners:
    address:
      socket_address:
        address: 0.0.0.0
        port_value: 10000
    filter_chains:
      - filters:
          - name: envoy.filters.network.http_connection_manager
            typed_config:
              "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
              stat_prefix: ingress_http
              codec_type: AUTO
              scheme_header_transformation:
                scheme_to_overwrite: https
              route_config:
                name: local_routes
                virtual_hosts:
                  - name: openai
                    domains:
                      - "api.openai.com"
                    routes:
                      - match:
                          prefix: "/"
                        route:
                          auto_host_rewrite: true
                          cluster: openai
                  - name: local_service
                    domains:
                      - "*"
                    routes:
                      - match:
                          prefix: "/v1/chat/completions"
                        route:
                          auto_host_rewrite: true
                          cluster: openai
                      - match:
                          prefix: "/embeddings"
                        route:
                          cluster: embeddingserver
                      - match:
                          prefix: "/inline"
                        route:
                          cluster: httpbin
                      - match:
                          prefix: "/"
                        direct_response:
                          status: 200
                          body:
                            inline_string: "Inspect the HTTP header: custom-header.\n"
              http_filters:
                - name: envoy.filters.http.wasm
                  typed_config:
                    "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                    type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                    value:
                      config:
                        name: "http_config"
                        configuration:
                          "@type": "type.googleapis.com/google.protobuf.StringValue"
                          value: |
                            katanemo-prompt-config:
                              default-prompt-endpoint: "127.0.0.1"
                              load-balancing: "round-robin"
                              timeout-ms: 5000

                              embedding-provider:
                                name: "SentenceTransformer"
                                model: "all-MiniLM-L6-v2"

                              llm-providers:

                                - name: "open-ai-gpt-4"
                                  api-key: "$OPEN_AI_API_KEY"
                                  model: gpt-4

                              system-prompt: |
                                You are a helpful weather forecaster. Please following following guidelines when responding to user queries:
                                - Use farenheight for temperature
                                - Use miles per hour for wind speed

                              prompt-targets:

                                - type: context-resolver
                                  name: weather-forecast
                                  few-shot-examples:
                                    - what is the weather in New York?
                                  endpoint: "POST:$WEATHER_FORECAST_API_ENDPOINT"
                                  cache-response: true
                                  cache-response-settings:
                                    - cache-ttl-secs: 3600 # cache expiry in seconds
                                    - cache-max-size: 1000 # in number of items
                                    - cache-eviction-strategy: LRU

                        vm_config:
                          runtime: "envoy.wasm.runtime.v8"
                          code:
                            local:
                              filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
                - name: envoy.filters.http.router
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
    # LLM Host
    # Embedding Providers
    # External LLM Providers
    - name: openai
      connect_timeout: 5s
      type: LOGICAL_DNS
      lb_policy: ROUND_ROBIN
      typed_extension_protocol_options:
        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
          explicit_http_config:
            http2_protocol_options: {}
      load_assignment:
        cluster_name: openai
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.openai.com
                      port_value: 443
                  hostname: "api.openai.com"
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.openai.com
          common_tls_context:
            tls_params:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3

    - name: httpbin
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: httpbin
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: httpbin.org
                      port_value: 80
                  hostname: "httpbin.org"
    - name: embeddingserver
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: embeddingserver
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: embeddingserver
                      port_value: 80
                  hostname: "embeddingserver"
    - name: qdrant
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: qdrant
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: qdrant
                      port_value: 6333
                  hostname: "qdrant"