wip

Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
2026-07-23 16:51:04 +02:00 · 2024-09-25 14:10:19 -07:00 · 2024-09-25 14:10:19 -07:00 · 7c4dde5d1f
commit 7c4dde5d1f
parent d38246ceaf
6 changed files with 11 additions and 251 deletions
--- a/demos/function_calling/api_server/requirements.txt
+++ b/demos/function_calling/api_server/requirements.txt
@ -1,2 +1,2 @@
-fastapi
-uvicorn
+fastapi==0.115.0
+uvicorn==0.30.6
--- a/envoyfilter/envoy.template.yaml
+++ b/envoyfilter/envoy.template.yaml
@ -77,8 +77,6 @@ static_resources:
                  typed_config:
                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
-    # LLM Host
-    # Embedding Providers
    # External LLM Providers
    - name: openai
      connect_timeout: 5s
--- a/envoyfilter/envoy.yaml
+++ b/envoyfilter/envoy.yaml
@ -1,233 +0,0 @@
-admin:
-  address:
-    socket_address: { address: 0.0.0.0, port_value: 9901 }
-static_resources:
-  listeners:
-    address:
-      socket_address:
-        address: 0.0.0.0
-        port_value: 10000
-    filter_chains:
-      - filters:
-          - name: envoy.filters.network.http_connection_manager
-            typed_config:
-              "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
-              stat_prefix: ingress_http
-              codec_type: AUTO
-              scheme_header_transformation:
-                scheme_to_overwrite: https
-              route_config:
-                  - name: bolt
-                    domains:
-                      - "*"
-                    routes:
-                      - match:
-                          headers:
-                            - name: "x-bolt-llm-provider"
-                              string_match:
-                                exact: openai
-                        route:
-                          auto_host_rewrite: true
-                          cluster: openai
-                          timeout: 60s
-                      - match:
-                          headers:
-                            - name: "x-bolt-llm-provider"
-                              string_match:
-                                exact: mistral
-                        route:
-                          auto_host_rewrite: true
-                          cluster: mistral
-                          timeout: 60s
-                      - match:
-                          prefix: "/embeddings"
-                        route:
-                          cluster: embeddingserver
-              http_filters:
-                - name: envoy.filters.http.wasm
-                  typed_config:
-                    "@type": type.googleapis.com/udpa.type.v1.TypedStruct
-                    type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
-                    value:
-                      config:
-                        name: "http_config"
-                        configuration:
-                          "@type": "type.googleapis.com/google.protobuf.StringValue"
-                          value: |
-                              default_prompt_endpoint: "127.0.0.1"
-                              load_balancing: "round_robin"
-                              timeout_ms: 5000
-
-                              embedding_provider:
-                                name: "SentenceTransformer"
-                                model: "all-MiniLM-L6-v2"
-
-                              llm_providers:
-
-                                - name: open-ai-gpt-4
-                                  api_key: "$OPEN_AI_API_KEY"
-                                  model: gpt-4
-
-                                - name: mistral_7b_instruct
-                                  model: mistral-7b-instruct
-                                  endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
-                                  default: true
-
-
-                              prompt_targets:
-
-                                - type: context_resolver
-                                  name: weather_forecast
-                                  few_shot_examples:
-                                    - what is the weather in New York?
-                                    - how is the weather in San Francisco?
-                                    - what is the forecast in Seattle?
-                                  entities:
-                                    - name: city
-                                      required: true
-                                    - name: days
-                                  endpoint:
-                                    cluster: weatherhost
-                                    path: /weather
-                                  system_prompt: |
-                                    You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
-                                    - Use farenheight for temperature
-                                    - Use miles per hour for wind speed
-                        vm_config:
-                          runtime: "envoy.wasm.runtime.v8"
-                          code:
-                            local:
-                              filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
-                - name: envoy.filters.http.router
-                  typed_config:
-                    "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
-  clusters:
-    # LLM Host
-    # Embedding Providers
-    # External LLM Providers
-    - name: openai
-      connect_timeout: 5s
-      type: LOGICAL_DNS
-      lb_policy: ROUND_ROBIN
-      typed_extension_protocol_options:
-        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
-          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
-          explicit_http_config:
-            http2_protocol_options: {}
-      load_assignment:
-        cluster_name: openai
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: api.openai.com
-                      port_value: 443
-                  hostname: "api.openai.com"
-      transport_socket:
-        name: envoy.transport_sockets.tls
-        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          sni: api.openai.com
-          common_tls_context:
-            tls_params:
-              tls_minimum_protocol_version: TLSv1_2
-              tls_maximum_protocol_version: TLSv1_3
-    - name: mistral
-      connect_timeout: 5s
-      type: LOGICAL_DNS
-      lb_policy: ROUND_ROBIN
-      typed_extension_protocol_options:
-        envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
-          "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
-          explicit_http_config:
-            http2_protocol_options: {}
-      load_assignment:
-        cluster_name: mistral
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: api.mistral.ai
-                      port_value: 443
-                  hostname: "api.mistral.ai"
-      transport_socket:
-        name: envoy.transport_sockets.tls
-        typed_config:
-          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
-          sni: api.mistral.ai
-          common_tls_context:
-            tls_params:
-              tls_minimum_protocol_version: TLSv1_2
-              tls_maximum_protocol_version: TLSv1_3
-    - name: embeddingserver
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: embeddingserver
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: weatherhost
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: weatherhost
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: nerhost
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: nerhost
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: host.docker.internal
-                      port_value: 8000
-                  hostname: "embeddingserver"
-    - name: qdrant
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: qdrant
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: qdrant
-                      port_value: 6333
-                  hostname: "qdrant"
-    - name: mistral_7b_instruct
-      connect_timeout: 5s
-      type: STRICT_DNS
-      lb_policy: ROUND_ROBIN
-      load_assignment:
-        cluster_name: qdrant
-        endpoints:
-          - lb_endpoints:
-              - endpoint:
-                  address:
-                    socket_address:
-                      address: mistral_7b_instruct
-                      port_value: 10001
-                  hostname: "mistral_7b_instruct"
--- a/envoyfilter/src/stream_context.rs
+++ b/envoyfilter/src/stream_context.rs
@ -1022,7 +1022,10 @@ impl HttpContext for StreamContext {
                    }
                };

-            self.response_tokens += chat_completions_response.usage.completion_tokens;
+            self.response_tokens += chat_completions_response
+                .usage
+                .expect("Third Party should provide usage details")
+                .completion_tokens;
        }

        debug!(
--- a/public_types/src/common_types.rs
+++ b/public_types/src/common_types.rs
@ -25,7 +25,6 @@ pub struct StoreVectorEmbeddingsRequest {
    pub points: Vec<VectorPoint>,
 }

-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SearchPointResult {
    pub id: String,
@ -121,9 +120,9 @@ pub mod open_ai {

    #[derive(Debug, Clone, Serialize, Deserialize)]
    pub struct ChatCompletionsResponse {
-        pub usage: Usage,
+        pub usage: Option<Usage>,
        pub choices: Vec<Choice>,
-        pub model: String
+        pub model: String,
    }

    #[derive(Debug, Clone, Serialize, Deserialize)]
@ -172,7 +171,7 @@ pub enum PromptGuardTask {
    #[serde(rename = "toxicity")]
    Toxicity,
    #[serde(rename = "both")]
-    Both
+    Both,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -187,4 +186,4 @@ pub struct PromptGuardResponse {
    pub jailbreak_prob: Option<f64>,
    pub toxic_verdict: Option<bool>,
    pub jailbreak_verdict: Option<bool>,
-}
+}
--- a/public_types/src/configuration.rs
+++ b/public_types/src/configuration.rs
@ -71,13 +71,6 @@ pub enum LoadBalancing {
    Random,
 }

-#[derive(Debug, Clone, Serialize, Deserialize)]
-//TODO: use enum for model, but if there is a new model, we need to update the code
-pub struct EmbeddingProviver {
-    pub name: String,
-    pub model: String,
-}
-
 #[derive(Debug, Clone, Serialize, Deserialize)]
 //TODO: use enum for model, but if there is a new model, we need to update the code
 pub struct LlmProvider {
@ -193,4 +186,4 @@ ratelimits:
        let c: super::Configuration = serde_yaml::from_str(CONFIGURATION).unwrap();
        assert_eq!(c.prompt_guards.unwrap().input_guard.len(), 2);
    }
-}
+}