Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
This commit is contained in:
José Ulises Niño Rivera 2024-09-25 14:10:19 -07:00
parent d38246ceaf
commit 7c4dde5d1f
6 changed files with 11 additions and 251 deletions

View file

@ -1,2 +1,2 @@
fastapi
uvicorn
fastapi==0.115.0
uvicorn==0.30.6

View file

@ -77,8 +77,6 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
# LLM Host
# Embedding Providers
# External LLM Providers
- name: openai
connect_timeout: 5s

View file

@ -1,233 +0,0 @@
admin:
address:
socket_address: { address: 0.0.0.0, port_value: 9901 }
static_resources:
listeners:
address:
socket_address:
address: 0.0.0.0
port_value: 10000
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
stat_prefix: ingress_http
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
route_config:
- name: bolt
domains:
- "*"
routes:
- match:
headers:
- name: "x-bolt-llm-provider"
string_match:
exact: openai
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
- match:
headers:
- name: "x-bolt-llm-provider"
string_match:
exact: mistral
route:
auto_host_rewrite: true
cluster: mistral
timeout: 60s
- match:
prefix: "/embeddings"
route:
cluster: embeddingserver
http_filters:
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
value:
config:
name: "http_config"
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: open-ai-gpt-4
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
- name: mistral_7b_instruct
model: mistral-7b-instruct
endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
default: true
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Seattle?
entities:
- name: city
required: true
- name: days
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
clusters:
# LLM Host
# Embedding Providers
# External LLM Providers
- name: openai
connect_timeout: 5s
type: LOGICAL_DNS
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options: {}
load_assignment:
cluster_name: openai
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.openai.com
port_value: 443
hostname: "api.openai.com"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.openai.com
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: mistral
connect_timeout: 5s
type: LOGICAL_DNS
lb_policy: ROUND_ROBIN
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
explicit_http_config:
http2_protocol_options: {}
load_assignment:
cluster_name: mistral
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: api.mistral.ai
port_value: 443
hostname: "api.mistral.ai"
transport_socket:
name: envoy.transport_sockets.tls
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
sni: api.mistral.ai
common_tls_context:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: embeddingserver
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: embeddingserver
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: weatherhost
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: weatherhost
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: nerhost
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: nerhost
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: qdrant
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: qdrant
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: qdrant
port_value: 6333
hostname: "qdrant"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: qdrant
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: mistral_7b_instruct
port_value: 10001
hostname: "mistral_7b_instruct"

View file

@ -1022,7 +1022,10 @@ impl HttpContext for StreamContext {
}
};
self.response_tokens += chat_completions_response.usage.completion_tokens;
self.response_tokens += chat_completions_response
.usage
.expect("Third Party should provide usage details")
.completion_tokens;
}
debug!(

View file

@ -25,7 +25,6 @@ pub struct StoreVectorEmbeddingsRequest {
pub points: Vec<VectorPoint>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchPointResult {
pub id: String,
@ -121,9 +120,9 @@ pub mod open_ai {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChatCompletionsResponse {
pub usage: Usage,
pub usage: Option<Usage>,
pub choices: Vec<Choice>,
pub model: String
pub model: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -172,7 +171,7 @@ pub enum PromptGuardTask {
#[serde(rename = "toxicity")]
Toxicity,
#[serde(rename = "both")]
Both
Both,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -187,4 +186,4 @@ pub struct PromptGuardResponse {
pub jailbreak_prob: Option<f64>,
pub toxic_verdict: Option<bool>,
pub jailbreak_verdict: Option<bool>,
}
}

View file

@ -71,13 +71,6 @@ pub enum LoadBalancing {
Random,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
//TODO: use enum for model, but if there is a new model, we need to update the code
pub struct EmbeddingProviver {
pub name: String,
pub model: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
//TODO: use enum for model, but if there is a new model, we need to update the code
pub struct LlmProvider {
@ -193,4 +186,4 @@ ratelimits:
let c: super::Configuration = serde_yaml::from_str(CONFIGURATION).unwrap();
assert_eq!(c.prompt_guards.unwrap().input_guard.len(), 2);
}
}
}