mirror of
https://github.com/katanemo/plano.git
synced 2026-06-08 14:55:14 +02:00
wip
Signed-off-by: José Ulises Niño Rivera <junr03@users.noreply.github.com>
This commit is contained in:
parent
d38246ceaf
commit
7c4dde5d1f
6 changed files with 11 additions and 251 deletions
|
|
@ -1,2 +1,2 @@
|
|||
fastapi
|
||||
uvicorn
|
||||
fastapi==0.115.0
|
||||
uvicorn==0.30.6
|
||||
|
|
|
|||
|
|
@ -77,8 +77,6 @@ static_resources:
|
|||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
clusters:
|
||||
# LLM Host
|
||||
# Embedding Providers
|
||||
# External LLM Providers
|
||||
- name: openai
|
||||
connect_timeout: 5s
|
||||
|
|
|
|||
|
|
@ -1,233 +0,0 @@
|
|||
admin:
|
||||
address:
|
||||
socket_address: { address: 0.0.0.0, port_value: 9901 }
|
||||
static_resources:
|
||||
listeners:
|
||||
address:
|
||||
socket_address:
|
||||
address: 0.0.0.0
|
||||
port_value: 10000
|
||||
filter_chains:
|
||||
- filters:
|
||||
- name: envoy.filters.network.http_connection_manager
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||
stat_prefix: ingress_http
|
||||
codec_type: AUTO
|
||||
scheme_header_transformation:
|
||||
scheme_to_overwrite: https
|
||||
route_config:
|
||||
- name: bolt
|
||||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
headers:
|
||||
- name: "x-bolt-llm-provider"
|
||||
string_match:
|
||||
exact: openai
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: openai
|
||||
timeout: 60s
|
||||
- match:
|
||||
headers:
|
||||
- name: "x-bolt-llm-provider"
|
||||
string_match:
|
||||
exact: mistral
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: mistral
|
||||
timeout: 60s
|
||||
- match:
|
||||
prefix: "/embeddings"
|
||||
route:
|
||||
cluster: embeddingserver
|
||||
http_filters:
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
|
||||
value:
|
||||
config:
|
||||
name: "http_config"
|
||||
configuration:
|
||||
"@type": "type.googleapis.com/google.protobuf.StringValue"
|
||||
value: |
|
||||
default_prompt_endpoint: "127.0.0.1"
|
||||
load_balancing: "round_robin"
|
||||
timeout_ms: 5000
|
||||
|
||||
embedding_provider:
|
||||
name: "SentenceTransformer"
|
||||
model: "all-MiniLM-L6-v2"
|
||||
|
||||
llm_providers:
|
||||
|
||||
- name: open-ai-gpt-4
|
||||
api_key: "$OPEN_AI_API_KEY"
|
||||
model: gpt-4
|
||||
|
||||
- name: mistral_7b_instruct
|
||||
model: mistral-7b-instruct
|
||||
endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
|
||||
default: true
|
||||
|
||||
|
||||
prompt_targets:
|
||||
|
||||
- type: context_resolver
|
||||
name: weather_forecast
|
||||
few_shot_examples:
|
||||
- what is the weather in New York?
|
||||
- how is the weather in San Francisco?
|
||||
- what is the forecast in Seattle?
|
||||
entities:
|
||||
- name: city
|
||||
required: true
|
||||
- name: days
|
||||
endpoint:
|
||||
cluster: weatherhost
|
||||
path: /weather
|
||||
system_prompt: |
|
||||
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
|
||||
- Use farenheight for temperature
|
||||
- Use miles per hour for wind speed
|
||||
vm_config:
|
||||
runtime: "envoy.wasm.runtime.v8"
|
||||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm"
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
clusters:
|
||||
# LLM Host
|
||||
# Embedding Providers
|
||||
# External LLM Providers
|
||||
- name: openai
|
||||
connect_timeout: 5s
|
||||
type: LOGICAL_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
typed_extension_protocol_options:
|
||||
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
|
||||
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
|
||||
explicit_http_config:
|
||||
http2_protocol_options: {}
|
||||
load_assignment:
|
||||
cluster_name: openai
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: api.openai.com
|
||||
port_value: 443
|
||||
hostname: "api.openai.com"
|
||||
transport_socket:
|
||||
name: envoy.transport_sockets.tls
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: api.openai.com
|
||||
common_tls_context:
|
||||
tls_params:
|
||||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
- name: mistral
|
||||
connect_timeout: 5s
|
||||
type: LOGICAL_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
typed_extension_protocol_options:
|
||||
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
|
||||
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
|
||||
explicit_http_config:
|
||||
http2_protocol_options: {}
|
||||
load_assignment:
|
||||
cluster_name: mistral
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: api.mistral.ai
|
||||
port_value: 443
|
||||
hostname: "api.mistral.ai"
|
||||
transport_socket:
|
||||
name: envoy.transport_sockets.tls
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
|
||||
sni: api.mistral.ai
|
||||
common_tls_context:
|
||||
tls_params:
|
||||
tls_minimum_protocol_version: TLSv1_2
|
||||
tls_maximum_protocol_version: TLSv1_3
|
||||
- name: embeddingserver
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: embeddingserver
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 8000
|
||||
hostname: "embeddingserver"
|
||||
- name: weatherhost
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: weatherhost
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 8000
|
||||
hostname: "embeddingserver"
|
||||
- name: nerhost
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: nerhost
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 8000
|
||||
hostname: "embeddingserver"
|
||||
- name: qdrant
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: qdrant
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: qdrant
|
||||
port_value: 6333
|
||||
hostname: "qdrant"
|
||||
- name: mistral_7b_instruct
|
||||
connect_timeout: 5s
|
||||
type: STRICT_DNS
|
||||
lb_policy: ROUND_ROBIN
|
||||
load_assignment:
|
||||
cluster_name: qdrant
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: mistral_7b_instruct
|
||||
port_value: 10001
|
||||
hostname: "mistral_7b_instruct"
|
||||
|
|
@ -1022,7 +1022,10 @@ impl HttpContext for StreamContext {
|
|||
}
|
||||
};
|
||||
|
||||
self.response_tokens += chat_completions_response.usage.completion_tokens;
|
||||
self.response_tokens += chat_completions_response
|
||||
.usage
|
||||
.expect("Third Party should provide usage details")
|
||||
.completion_tokens;
|
||||
}
|
||||
|
||||
debug!(
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ pub struct StoreVectorEmbeddingsRequest {
|
|||
pub points: Vec<VectorPoint>,
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SearchPointResult {
|
||||
pub id: String,
|
||||
|
|
@ -121,9 +120,9 @@ pub mod open_ai {
|
|||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ChatCompletionsResponse {
|
||||
pub usage: Usage,
|
||||
pub usage: Option<Usage>,
|
||||
pub choices: Vec<Choice>,
|
||||
pub model: String
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -172,7 +171,7 @@ pub enum PromptGuardTask {
|
|||
#[serde(rename = "toxicity")]
|
||||
Toxicity,
|
||||
#[serde(rename = "both")]
|
||||
Both
|
||||
Both,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
|
|
@ -187,4 +186,4 @@ pub struct PromptGuardResponse {
|
|||
pub jailbreak_prob: Option<f64>,
|
||||
pub toxic_verdict: Option<bool>,
|
||||
pub jailbreak_verdict: Option<bool>,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -71,13 +71,6 @@ pub enum LoadBalancing {
|
|||
Random,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
//TODO: use enum for model, but if there is a new model, we need to update the code
|
||||
pub struct EmbeddingProviver {
|
||||
pub name: String,
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
//TODO: use enum for model, but if there is a new model, we need to update the code
|
||||
pub struct LlmProvider {
|
||||
|
|
@ -193,4 +186,4 @@ ratelimits:
|
|||
let c: super::Configuration = serde_yaml::from_str(CONFIGURATION).unwrap();
|
||||
assert_eq!(c.prompt_guards.unwrap().input_guard.len(), 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue