Add workflow logic for weather forecast demo (#24)

2026-05-14 18:42:38 +02:00 · 2024-07-30 16:23:23 -07:00 · 2024-07-30 16:23:23 -07:00 · 33f9dd22e6
commit 33f9dd22e6
parent 7ef68eccfb
32 changed files with 1902 additions and 459 deletions
--- a/demos/weather-forecast/README.md
+++ b/demos/weather-forecast/README.md
@ -0,0 +1,15 @@
+# Weather forecasting
+This demo shows how you can use intelligent prompt gateway to provide realtime weather forecast.
+
+# Startig the demo
+1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
+1. Start services
+  ```sh
+  $ docker compose up
+  ```
+1. Navigate to http://localhost:18080/
+1. You can type in queries like "how is the weather in Seattle"
+   1. You can also ask follow up questions like "show me sunny days"
+2. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
+   1. Open up dahsboard named "Intelligent Gateway Overview"
+   2. On this dashboard you can see reuqest latency and number of requests
--- a/demos/weather-forecast/docker-compose.yaml
+++ b/demos/weather-forecast/docker-compose.yaml
@ -0,0 +1,85 @@
+services:
+  envoy:
+    build:
+      context: ../../
+      dockerfile: envoyfilter/Dockerfile
+    hostname: envoy
+    ports:
+      - "10000:10000"
+      - "19901:9901"
+    volumes:
+      - ./envoy.yaml:/etc/envoy/envoy.yaml
+      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
+    networks:
+      - envoymesh
+    depends_on:
+      embeddingserver:
+        condition: service_healthy
+
+  embeddingserver:
+    build:
+      context: ../../embedding-server
+      dockerfile: Dockerfile
+    ports:
+      - "18081:80"
+    healthcheck:
+        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
+        interval: 5s
+        retries: 20
+    networks:
+      - envoymesh
+
+  qdrant:
+    image: qdrant/qdrant
+    hostname: vector-db
+    ports:
+      - 16333:6333
+      - 16334:6334
+    networks:
+      - envoymesh
+
+  chatbot-ui:
+    build:
+      context: ../../chatbot-ui
+      dockerfile: Dockerfile
+    ports:
+      - "18080:8080"
+    networks:
+      - envoymesh
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1/chat/completions
+
+  prometheus:
+    image: prom/prometheus
+    container_name: prometheus
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yaml'
+    ports:
+      - 9090:9090
+    restart: unless-stopped
+    volumes:
+      - ./prometheus:/etc/prometheus
+      - ./prom_data:/prometheus
+    networks:
+      - envoymesh
+
+  grafana:
+    image: grafana/grafana
+    container_name: grafana
+    ports:
+      - 3000:3000
+    restart: unless-stopped
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=grafana
+    volumes:
+      - ./grafana:/etc/grafana/provisioning/datasources
+      - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
+      - ./grafana/dashboards:/var/lib/grafana/dashboards
+      # - ./grafana-data:/var/lib/grafana
+    networks:
+      - envoymesh
+
+networks:
+  envoymesh: {}
--- a/demos/weather-forecast/envoy.yaml
+++ b/demos/weather-forecast/envoy.yaml
@ -0,0 +1,197 @@
+admin:
+  address:
+    socket_address: { address: 0.0.0.0, port_value: 9901 }
+static_resources:
+  listeners:
+    address:
+      socket_address:
+        address: 0.0.0.0
+        port_value: 10000
+    filter_chains:
+      - filters:
+          - name: envoy.filters.network.http_connection_manager
+            typed_config:
+              "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
+              stat_prefix: ingress_http
+              codec_type: AUTO
+              scheme_header_transformation:
+                scheme_to_overwrite: https
+              route_config:
+                name: local_routes
+                virtual_hosts:
+                  - name: openai
+                    domains:
+                      - "api.openai.com"
+                    routes:
+                      - match:
+                          prefix: "/"
+                        route:
+                          auto_host_rewrite: true
+                          cluster: openai
+                  - name: local_service
+                    domains:
+                      - "*"
+                    routes:
+                      - match:
+                          prefix: "/v1/chat/completions"
+                        route:
+                          auto_host_rewrite: true
+                          cluster: openai
+                      - match:
+                          prefix: "/embeddings"
+                        route:
+                          cluster: embeddingserver
+                      - match:
+                          prefix: "/"
+                        direct_response:
+                          status: 200
+                          body:
+                            inline_string: "Inspect the HTTP header: custom-header.\n"
+              http_filters:
+                - name: envoy.filters.http.wasm
+                  typed_config:
+                    "@type": type.googleapis.com/udpa.type.v1.TypedStruct
+                    type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
+                    value:
+                      config:
+                        name: "http_config"
+                        configuration:
+                          "@type": "type.googleapis.com/google.protobuf.StringValue"
+                          value: |
+                            katanemo-prompt-config:
+                              default-prompt-endpoint: "127.0.0.1"
+                              load-balancing: "round-robin"
+                              timeout-ms: 5000
+
+                              embedding-provider:
+                                name: "SentenceTransformer"
+                                model: "all-MiniLM-L6-v2"
+
+                              llm-providers:
+
+                                - name: "open-ai-gpt-4"
+                                  api-key: "$OPEN_AI_API_KEY"
+                                  model: gpt-4
+
+                              prompt-targets:
+
+                                - type: context-resolver
+                                  name: weather-forecast
+                                  few-shot-examples:
+                                    - what is the weather in New York?
+                                    - how is the weather in San Francisco?
+                                    - what is the forecast in Seattle?
+                                  entities:
+                                    - name: city
+                                      required: true
+                                    - name: days
+                                  endpoint:
+                                    cluster: weatherhost
+                                    path: /weather
+                                  cache-response: true
+                                  cache-response-settings:
+                                    - cache-ttl-secs: 3600 # cache expiry in seconds
+                                    - cache-max-size: 1000 # in number of items
+                                    - cache-eviction-strategy: LRU
+                                  system-prompt: |
+                                    You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
+                                    - Use farenheight for temperature
+                                    - Use miles per hour for wind speed
+
+                        vm_config:
+                          runtime: "envoy.wasm.runtime.v8"
+                          code:
+                            local:
+                              filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
+                - name: envoy.filters.http.router
+                  typed_config:
+                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
+  clusters:
+    # LLM Host
+    # Embedding Providers
+    # External LLM Providers
+    - name: openai
+      connect_timeout: 5s
+      type: LOGICAL_DNS
+      lb_policy: ROUND_ROBIN
+      typed_extension_protocol_options:
+        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
+          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
+          explicit_http_config:
+            http2_protocol_options: {}
+      load_assignment:
+        cluster_name: openai
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: api.openai.com
+                      port_value: 443
+                  hostname: "api.openai.com"
+      transport_socket:
+        name: envoy.transport_sockets.tls
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
+          sni: api.openai.com
+          common_tls_context:
+            tls_params:
+              tls_minimum_protocol_version: TLSv1_2
+              tls_maximum_protocol_version: TLSv1_3
+
+    - name: embeddingserver
+      connect_timeout: 5s
+      type: STRICT_DNS
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: embeddingserver
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: embeddingserver
+                      port_value: 80
+                  hostname: "embeddingserver"
+    - name: weatherhost
+      connect_timeout: 5s
+      type: STRICT_DNS
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: weatherhost
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: embeddingserver
+                      port_value: 80
+                  hostname: "embeddingserver"
+    - name: nerhost
+      connect_timeout: 5s
+      type: STRICT_DNS
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: nerhost
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: embeddingserver
+                      port_value: 80
+                  hostname: "embeddingserver"
+    - name: qdrant
+      connect_timeout: 5s
+      type: STRICT_DNS
+      lb_policy: ROUND_ROBIN
+      load_assignment:
+        cluster_name: qdrant
+        endpoints:
+          - lb_endpoints:
+              - endpoint:
+                  address:
+                    socket_address:
+                      address: qdrant
+                      port_value: 6333
+                  hostname: "qdrant"
--- a/demos/weather-forecast/grafana/dashboard.yaml
+++ b/demos/weather-forecast/grafana/dashboard.yaml
@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+  - name: "Dashboard provider"
+    orgId: 1
+    type: file
+    disableDeletion: false
+    updateIntervalSeconds: 10
+    allowUiUpdates: false
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: true
--- a/demos/weather-forecast/grafana/dashboards/envoy_overview.json
+++ b/demos/weather-forecast/grafana/dashboards/envoy_overview.json
@ -0,0 +1,355 @@
+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "links": [],
+  "panels": [
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
+          "fullMetaSearch": false,
+          "hide": false,
+          "includeNullMetadata": true,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "request latency - internal (ms)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
+          "fullMetaSearch": false,
+          "hide": false,
+          "includeNullMetadata": true,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        }
+      ],
+      "title": "request latency - external (ms)",
+      "type": "timeseries"
+    },
+    {
+      "datasource": {
+        "type": "prometheus",
+        "uid": "PBFA97CFB590B2093"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisBorderShow": false,
+            "axisCenteredZero": false,
+            "axisColorMode": "text",
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 0,
+            "gradientMode": "none",
+            "hideFrom": {
+              "legend": false,
+              "tooltip": false,
+              "viz": false
+            },
+            "insertNulls": false,
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false,
+            "stacking": {
+              "group": "A",
+              "mode": "none"
+            },
+            "thresholdsStyle": {
+              "mode": "off"
+            }
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "legend": {
+          "calcs": [],
+          "displayMode": "list",
+          "placement": "bottom",
+          "showLegend": true
+        },
+        "tooltip": {
+          "mode": "single",
+          "sort": "none"
+        }
+      },
+      "targets": [
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
+          "fullMetaSearch": false,
+          "includeNullMetadata": true,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "A",
+          "useBackend": false
+        },
+        {
+          "datasource": {
+            "type": "prometheus",
+            "uid": "PBFA97CFB590B2093"
+          },
+          "disableTextWrap": false,
+          "editorMode": "code",
+          "expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
+          "fullMetaSearch": false,
+          "hide": false,
+          "includeNullMetadata": true,
+          "instant": false,
+          "legendFormat": "__auto",
+          "range": true,
+          "refId": "B",
+          "useBackend": false
+        }
+      ],
+      "title": "Upstream request count",
+      "type": "timeseries"
+    }
+  ],
+  "schemaVersion": 39,
+  "tags": [],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-15m",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "Intelligent Gateway Overview",
+  "uid": "adt6uhx5lk8aob",
+  "version": 3,
+  "weekStart": ""
+}
--- a/demos/weather-forecast/grafana/datasource.yaml
+++ b/demos/weather-forecast/grafana/datasource.yaml
@ -0,0 +1,9 @@
+apiVersion: 1
+
+datasources:
+- name: Prometheus
+  type: prometheus
+  url: http://prometheus:9090
+  isDefault: true
+  access: proxy
+  editable: true
--- a/demos/weather-forecast/prometheus/prometheus.yaml
+++ b/demos/weather-forecast/prometheus/prometheus.yaml
@ -0,0 +1,23 @@
+global:
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  evaluation_interval: 15s
+alerting:
+  alertmanagers:
+  - static_configs:
+    - targets: []
+    scheme: http
+    timeout: 10s
+    api_version: v1
+scrape_configs:
+- job_name: envoy
+  honor_timestamps: true
+  scrape_interval: 15s
+  scrape_timeout: 10s
+  metrics_path: /stats
+  scheme: http
+  static_configs:
+  - targets:
+    - envoy:9901
+  params:
+    format: ['prometheus']