From 7c4dde5d1f27face2f2a194ea772c2ff55c947e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Ulises=20Ni=C3=B1o=20Rivera?= Date: Wed, 25 Sep 2024 14:10:19 -0700 Subject: [PATCH] wip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: José Ulises Niño Rivera --- .../api_server/requirements.txt | 4 +- envoyfilter/envoy.template.yaml | 2 - envoyfilter/envoy.yaml | 233 ------------------ envoyfilter/src/stream_context.rs | 5 +- public_types/src/common_types.rs | 9 +- public_types/src/configuration.rs | 9 +- 6 files changed, 11 insertions(+), 251 deletions(-) delete mode 100644 envoyfilter/envoy.yaml diff --git a/demos/function_calling/api_server/requirements.txt b/demos/function_calling/api_server/requirements.txt index 97dc7cd8..531efda7 100644 --- a/demos/function_calling/api_server/requirements.txt +++ b/demos/function_calling/api_server/requirements.txt @@ -1,2 +1,2 @@ -fastapi -uvicorn +fastapi==0.115.0 +uvicorn==0.30.6 diff --git a/envoyfilter/envoy.template.yaml b/envoyfilter/envoy.template.yaml index 249d3879..8977bd78 100644 --- a/envoyfilter/envoy.template.yaml +++ b/envoyfilter/envoy.template.yaml @@ -77,8 +77,6 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router clusters: - # LLM Host - # Embedding Providers # External LLM Providers - name: openai connect_timeout: 5s diff --git a/envoyfilter/envoy.yaml b/envoyfilter/envoy.yaml deleted file mode 100644 index f0236bf6..00000000 --- a/envoyfilter/envoy.yaml +++ /dev/null @@ -1,233 +0,0 @@ -admin: - address: - socket_address: { address: 0.0.0.0, port_value: 9901 } -static_resources: - listeners: - address: - socket_address: - address: 0.0.0.0 - port_value: 10000 - filter_chains: - - filters: - - name: envoy.filters.network.http_connection_manager - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - stat_prefix: ingress_http - codec_type: AUTO - scheme_header_transformation: - scheme_to_overwrite: https - route_config: - - name: bolt - domains: - - "*" - routes: - - match: - headers: - - name: "x-bolt-llm-provider" - string_match: - exact: openai - route: - auto_host_rewrite: true - cluster: openai - timeout: 60s - - match: - headers: - - name: "x-bolt-llm-provider" - string_match: - exact: mistral - route: - auto_host_rewrite: true - cluster: mistral - timeout: 60s - - match: - prefix: "/embeddings" - route: - cluster: embeddingserver - http_filters: - - name: envoy.filters.http.wasm - typed_config: - "@type": type.googleapis.com/udpa.type.v1.TypedStruct - type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm - value: - config: - name: "http_config" - configuration: - "@type": "type.googleapis.com/google.protobuf.StringValue" - value: | - default_prompt_endpoint: "127.0.0.1" - load_balancing: "round_robin" - timeout_ms: 5000 - - embedding_provider: - name: "SentenceTransformer" - model: "all-MiniLM-L6-v2" - - llm_providers: - - - name: open-ai-gpt-4 - api_key: "$OPEN_AI_API_KEY" - model: gpt-4 - - - name: mistral_7b_instruct - model: mistral-7b-instruct - endpoint: http://mistral_7b_instruct:10001/v1/chat/completions - default: true - - - prompt_targets: - - - type: context_resolver - name: weather_forecast - few_shot_examples: - - what is the weather in New York? - - how is the weather in San Francisco? - - what is the forecast in Seattle? - entities: - - name: city - required: true - - name: days - endpoint: - cluster: weatherhost - path: /weather - system_prompt: | - You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: - - Use farenheight for temperature - - Use miles per hour for wind speed - vm_config: - runtime: "envoy.wasm.runtime.v8" - code: - local: - filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - - name: envoy.filters.http.router - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - clusters: - # LLM Host - # Embedding Providers - # External LLM Providers - - name: openai - connect_timeout: 5s - type: LOGICAL_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: openai - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.openai.com - port_value: 443 - hostname: "api.openai.com" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.openai.com - common_tls_context: - tls_params: - tls_minimum_protocol_version: TLSv1_2 - tls_maximum_protocol_version: TLSv1_3 - - name: mistral - connect_timeout: 5s - type: LOGICAL_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: mistral - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.mistral.ai - port_value: 443 - hostname: "api.mistral.ai" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.mistral.ai - common_tls_context: - tls_params: - tls_minimum_protocol_version: TLSv1_2 - tls_maximum_protocol_version: TLSv1_3 - - name: embeddingserver - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: embeddingserver - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: weatherhost - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: weatherhost - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: nerhost - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: nerhost - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: qdrant - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: qdrant - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: qdrant - port_value: 6333 - hostname: "qdrant" - - name: mistral_7b_instruct - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: qdrant - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: mistral_7b_instruct - port_value: 10001 - hostname: "mistral_7b_instruct" diff --git a/envoyfilter/src/stream_context.rs b/envoyfilter/src/stream_context.rs index 5d2bdb5c..6b799246 100644 --- a/envoyfilter/src/stream_context.rs +++ b/envoyfilter/src/stream_context.rs @@ -1022,7 +1022,10 @@ impl HttpContext for StreamContext { } }; - self.response_tokens += chat_completions_response.usage.completion_tokens; + self.response_tokens += chat_completions_response + .usage + .expect("Third Party should provide usage details") + .completion_tokens; } debug!( diff --git a/public_types/src/common_types.rs b/public_types/src/common_types.rs index 07bfd46b..e0ede0ad 100644 --- a/public_types/src/common_types.rs +++ b/public_types/src/common_types.rs @@ -25,7 +25,6 @@ pub struct StoreVectorEmbeddingsRequest { pub points: Vec, } - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SearchPointResult { pub id: String, @@ -121,9 +120,9 @@ pub mod open_ai { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChatCompletionsResponse { - pub usage: Usage, + pub usage: Option, pub choices: Vec, - pub model: String + pub model: String, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -172,7 +171,7 @@ pub enum PromptGuardTask { #[serde(rename = "toxicity")] Toxicity, #[serde(rename = "both")] - Both + Both, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -187,4 +186,4 @@ pub struct PromptGuardResponse { pub jailbreak_prob: Option, pub toxic_verdict: Option, pub jailbreak_verdict: Option, -} \ No newline at end of file +} diff --git a/public_types/src/configuration.rs b/public_types/src/configuration.rs index 91bdcd8d..8ee27063 100644 --- a/public_types/src/configuration.rs +++ b/public_types/src/configuration.rs @@ -71,13 +71,6 @@ pub enum LoadBalancing { Random, } -#[derive(Debug, Clone, Serialize, Deserialize)] -//TODO: use enum for model, but if there is a new model, we need to update the code -pub struct EmbeddingProviver { - pub name: String, - pub model: String, -} - #[derive(Debug, Clone, Serialize, Deserialize)] //TODO: use enum for model, but if there is a new model, we need to update the code pub struct LlmProvider { @@ -193,4 +186,4 @@ ratelimits: let c: super::Configuration = serde_yaml::from_str(CONFIGURATION).unwrap(); assert_eq!(c.prompt_guards.unwrap().input_guard.len(), 2); } -} \ No newline at end of file +}