From 7c4dde5d1f27face2f2a194ea772c2ff55c947e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jos=C3=A9=20Ulises=20Ni=C3=B1o=20Rivera?=
 <junr03@users.noreply.github.com>
Date: Wed, 25 Sep 2024 14:10:19 -0700
Subject: [PATCH] wip
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
---
 .../api_server/requirements.txt               |   4 +-
 envoyfilter/envoy.template.yaml               |   2 -
 envoyfilter/envoy.yaml                        | 233 ------------------
 envoyfilter/src/stream_context.rs             |   5 +-
 public_types/src/common_types.rs              |   9 +-
 public_types/src/configuration.rs             |   9 +-
 6 files changed, 11 insertions(+), 251 deletions(-)
 delete mode 100644 envoyfilter/envoy.yaml

diff --git a/demos/function_calling/api_server/requirements.txt b/demos/function_calling/api_server/requirements.txt
index 97dc7cd8..531efda7 100644
--- a/demos/function_calling/api_server/requirements.txt
+++ b/demos/function_calling/api_server/requirements.txt
@@ -1,2 +1,2 @@
-fastapi
-uvicorn
+fastapi==0.115.0
+uvicorn==0.30.6
diff --git a/envoyfilter/envoy.template.yaml b/envoyfilter/envoy.template.yaml
index 249d3879..8977bd78 100644
--- a/envoyfilter/envoy.template.yaml
+++ b/envoyfilter/envoy.template.yaml
@@ -77,8 +77,6 @@ static_resources:
                   typed_config:
                     "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
   clusters:
-    # LLM Host
-    # Embedding Providers
     # External LLM Providers
     - name: openai
       connect_timeout: 5s
diff --git a/envoyfilter/envoy.yaml b/envoyfilter/envoy.yaml
deleted file mode 100644
index f0236bf6..00000000
--- a/envoyfilter/envoy.yaml
+++ /dev/null
@@ -1,233 +0,0 @@
-admin:
-  address:
-    socket_address: { address: 0.0.0.0, port_value: 9901 }
-static_resources:
-  listeners:
-    address:
-      socket_address:
-        address: 0.0.0.0
-        port_value: 10000
-    filter_chains:
-      - filters:
-          - name: envoy.filters.network.http_connection_manager
-            typed_config:
-              "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-              stat_prefix: ingress_http
-              codec_type: AUTO
-              scheme_header_transformation:
-                scheme_to_overwrite: https
-              route_config:
-                  - name: bolt
-                    domains:
-                      - "*"
-                    routes:
-                      - match:
-                          headers:
-                            - name: "x-bolt-llm-provider"
-                              string_match:
-                                exact: openai
-                        route:
-                          auto_host_rewrite: true
-                          cluster: openai
-                          timeout: 60s
-                      - match:
-                          headers:
-                            - name: "x-bolt-llm-provider"
-                              string_match:
-                                exact: mistral
-                        route:
-                          auto_host_rewrite: true
-                          cluster: mistral
-                          timeout: 60s
-                      - match:
-                          prefix: "/embeddings"
-                        route:
-                          cluster: embeddingserver
-              http_filters:
-                - name: envoy.filters.http.wasm
-                  typed_config:
-                    "@type": type.googleapis.com/udpa.type.v1.TypedStruct
-                    type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
-                    value:
-                      config:
-                        name: "http_config"
-                        configuration:
-                          "@type": "type.googleapis.com/google.protobuf.StringValue"
-                          value: |
-                              default_prompt_endpoint: "127.0.0.1"
-                              load_balancing: "round_robin"
-                              timeout_ms: 5000
-
-                              embedding_provider:
-                                name: "SentenceTransformer"
-                                model: "all-MiniLM-L6-v2"
-
-                              llm_providers:
-
-                                - name: open-ai-gpt-4
-                                  api_key: "$OPEN_AI_API_KEY"
-                                  model: gpt-4
-
-                                - name: mistral_7b_instruct
-                                  model: mistral-7b-instruct
-                                  endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
-                                  default: true
-
-
-                              prompt_targets:
-
-                                - type: context_resolver
-                                  name: weather_forecast
-                                  few_shot_examples:
-                                    - what is the weather in New York?
-                                    - how is the weather in San Francisco?
-                                    - what is the forecast in Seattle?
-                                  entities:
-                                    - name: city
-                                      required: true
-                                    - name: days
-                                  endpoint:
-                                    cluster: weatherhost
-                                    path: /weather
-                                  system_prompt: |
-                                    You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
-                                    - Use farenheight for temperature
-                                    - Use miles per hour for wind speed
-                        vm_config:
-                          runtime: "envoy.wasm.runtime.v8"
-                          code:
-                            local:
-                              filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
-                - name: envoy.filters.http.router
-                  typed_config:
-                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
-  clusters:
-    # LLM Host
-    # Embedding Providers
-    # External LLM Providers
-    - name: openai
-      connect_timeout: 5s
-      type: LOGICAL_DNS
-      lb_policy: ROUND_ROBIN
-      typed_extension_protocol_options:
-        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
-          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
-          explicit_http_config:
-            http2_protocol_options: {}
-      load_assignment:
-        cluster_name: openai
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: api.openai.com
-                      port_value: 443
-                  hostname: "api.openai.com"
-      transport_socket:
-        name: envoy.transport_sockets.tls
-        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          sni: api.openai.com
-          common_tls_context:
-            tls_params:
-              tls_minimum_protocol_version: TLSv1_2
-              tls_maximum_protocol_version: TLSv1_3
-    - name: mistral
-      connect_timeout: 5s
-      type: LOGICAL_DNS
-      lb_policy: ROUND_ROBIN
-      typed_extension_protocol_options:
-        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
-          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
-          explicit_http_config:
-            http2_protocol_options: {}
-      load_assignment:
-        cluster_name: mistral
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: api.mistral.ai
-                      port_value: 443
-                  hostname: "api.mistral.ai"
-      transport_socket:
-        name: envoy.transport_sockets.tls
-        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          sni: api.mistral.ai
-          common_tls_context:
-            tls_params:
-              tls_minimum_protocol_version: TLSv1_2
-              tls_maximum_protocol_version: TLSv1_3
-    - name: embeddingserver
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: embeddingserver
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: weatherhost
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: weatherhost
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: nerhost
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: nerhost
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: qdrant
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: qdrant
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: qdrant
-                      port_value: 6333
-                  hostname: "qdrant"
-    - name: mistral_7b_instruct
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: qdrant
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: mistral_7b_instruct
-                      port_value: 10001
-                  hostname: "mistral_7b_instruct"
diff --git a/envoyfilter/src/stream_context.rs b/envoyfilter/src/stream_context.rs
index 5d2bdb5c..6b799246 100644
--- a/envoyfilter/src/stream_context.rs
+++ b/envoyfilter/src/stream_context.rs
@@ -1022,7 +1022,10 @@ impl HttpContext for StreamContext {
                     }
                 };
 
-            self.response_tokens += chat_completions_response.usage.completion_tokens;
+            self.response_tokens += chat_completions_response
+                .usage
+                .expect("Third Party should provide usage details")
+                .completion_tokens;
         }
 
         debug!(
diff --git a/public_types/src/common_types.rs b/public_types/src/common_types.rs
index 07bfd46b..e0ede0ad 100644
--- a/public_types/src/common_types.rs
+++ b/public_types/src/common_types.rs
@@ -25,7 +25,6 @@ pub struct StoreVectorEmbeddingsRequest {
     pub points: Vec<VectorPoint>,
 }
 
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SearchPointResult {
     pub id: String,
@@ -121,9 +120,9 @@ pub mod open_ai {
 
     #[derive(Debug, Clone, Serialize, Deserialize)]
     pub struct ChatCompletionsResponse {
-        pub usage: Usage,
+        pub usage: Option<Usage>,
         pub choices: Vec<Choice>,
-        pub model: String
+        pub model: String,
     }
 
     #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -172,7 +171,7 @@ pub enum PromptGuardTask {
     #[serde(rename = "toxicity")]
     Toxicity,
     #[serde(rename = "both")]
-    Both
+    Both,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -187,4 +186,4 @@ pub struct PromptGuardResponse {
     pub jailbreak_prob: Option<f64>,
     pub toxic_verdict: Option<bool>,
     pub jailbreak_verdict: Option<bool>,
-}
\ No newline at end of file
+}
diff --git a/public_types/src/configuration.rs b/public_types/src/configuration.rs
index 91bdcd8d..8ee27063 100644
--- a/public_types/src/configuration.rs
+++ b/public_types/src/configuration.rs
@@ -71,13 +71,6 @@ pub enum LoadBalancing {
     Random,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-//TODO: use enum for model, but if there is a new model, we need to update the code
-pub struct EmbeddingProviver {
-    pub name: String,
-    pub model: String,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
@@ -193,4 +186,4 @@ ratelimits:
         let c: super::Configuration = serde_yaml::from_str(CONFIGURATION).unwrap();
         assert_eq!(c.prompt_guards.unwrap().input_guard.len(), 2);
     }
-}
\ No newline at end of file
+}