plano/envoyfilter/envoy.yaml

admin:
  address:
    socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
  listeners:
    address:
      socket_address:
        address: 0.0.0.0
        port_value: 10000
    filter_chains:
      - filters:
          - name: envoy.filters.network.http_connection_manager
            typed_config:
              "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
              stat_prefix: ingress_http
              codec_type: AUTO
              scheme_header_transformation:
                scheme_to_overwrite: https
              route_config:
                name: local_routes
                virtual_hosts:
                  - name: openai
                    domains:
                      - "api.openai.com"
                    routes:
                      - match:
                          prefix: "/"
                        route:
                          auto_host_rewrite: true
                          cluster: openai
                          timeout: 60s
                  - name: local_service
                    domains:
                      - "*"
                    routes:
                      - match:
                          prefix: "/v1/chat/completions"
                          headers:
                            name: "Authorization"
                            present_match: true
                        route:
                          auto_host_rewrite: true
                          cluster: openai
                          timeout: 60s
                      - match:
                          prefix: "/v1/chat/completions"
                        route:
                          auto_host_rewrite: true
                          cluster: mistral_7b_instruct
                          timeout: 60s
                      - match:
                          prefix: "/embeddings"
                        route:
                          cluster: embeddingserver
                      - match:
                          prefix: "/"
                        direct_response:
                          status: 200
                          body:
                            inline_string: "Inspect the HTTP header: custom-header.\n"
              http_filters:
                - name: envoy.filters.http.wasm
                  typed_config:
                    "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                    type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                    value:
                      config:
                        name: "http_config"
                        configuration:
                          "@type": "type.googleapis.com/google.protobuf.StringValue"
                          value: |
                              default_prompt_endpoint: "127.0.0.1"
                              load_balancing: "round_robin"
                              timeout_ms: 5000

                              embedding_provider:
                                name: "SentenceTransformer"
                                model: "all-MiniLM-L6-v2"

                              llm_providers:

                                - name: open-ai-gpt-4
                                  api_key: "$OPEN_AI_API_KEY"
                                  model: gpt-4

                                - name: mistral_7b_instruct
                                  model: mistral-7b-instruct
                                  endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
                                  default: true


                              prompt_targets:

                                - type: context_resolver
                                  name: weather_forecast
                                  few_shot_examples:
                                    - what is the weather in New York?
                                    - how is the weather in San Francisco?
                                    - what is the forecast in Seattle?
                                  entities:
                                    - name: city
                                      required: true
                                    - name: days
                                  endpoint:
                                    cluster: weatherhost
                                    path: /weather
                                  system_prompt: |
                                    You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
                                    - Use farenheight for temperature
                                    - Use miles per hour for wind speed
                        vm_config:
                          runtime: "envoy.wasm.runtime.v8"
                          code:
                            local:
                              filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
                - name: envoy.filters.http.router
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
    # LLM Host
    # Embedding Providers
    # External LLM Providers
    - name: openai
      connect_timeout: 5s
      type: LOGICAL_DNS
      lb_policy: ROUND_ROBIN
      typed_extension_protocol_options:
        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
          explicit_http_config:
            http2_protocol_options: {}
      load_assignment:
        cluster_name: openai
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.openai.com
                      port_value: 443
                  hostname: "api.openai.com"
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.openai.com
          common_tls_context:
            tls_params:
              tls_minimum_protocol_version: TLSv1_2
              tls_maximum_protocol_version: TLSv1_3
    - name: embeddingserver
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: embeddingserver
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: host.docker.internal
                      port_value: 8000
                  hostname: "embeddingserver"
    - name: weatherhost
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: weatherhost
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: host.docker.internal
                      port_value: 8000
                  hostname: "embeddingserver"
    - name: nerhost
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: nerhost
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: host.docker.internal
                      port_value: 8000
                  hostname: "embeddingserver"
    - name: qdrant
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: qdrant
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: qdrant
                      port_value: 6333
                  hostname: "qdrant"
    - name: mistral_7b_instruct
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: qdrant
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: mistral_7b_instruct
                      port_value: 10001
                  hostname: "mistral_7b_instruct"