diff --git a/arch/envoy.yaml b/arch/envoy.yaml deleted file mode 100644 index 31e9c3fa..00000000 --- a/arch/envoy.yaml +++ /dev/null @@ -1,233 +0,0 @@ -admin: - address: - socket_address: { address: 0.0.0.0, port_value: 9901 } -static_resources: - listeners: - address: - socket_address: - address: 0.0.0.0 - port_value: 10000 - filter_chains: - - filters: - - name: envoy.filters.network.http_connection_manager - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager - stat_prefix: ingress_http - codec_type: AUTO - scheme_header_transformation: - scheme_to_overwrite: https - route_config: - - name: arch - domains: - - "*" - routes: - - match: - headers: - - name: "x-arch-llm-provider" - string_match: - exact: openai - route: - auto_host_rewrite: true - cluster: openai - timeout: 60s - - match: - headers: - - name: "x-arch-llm-provider" - string_match: - exact: mistral - route: - auto_host_rewrite: true - cluster: mistral - timeout: 60s - - match: - prefix: "/embeddings" - route: - cluster: embeddingserver - http_filters: - - name: envoy.filters.http.wasm - typed_config: - "@type": type.googleapis.com/udpa.type.v1.TypedStruct - type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm - value: - config: - name: "http_config" - configuration: - "@type": "type.googleapis.com/google.protobuf.StringValue" - value: | - default_prompt_endpoint: "127.0.0.1" - load_balancing: "round_robin" - timeout_ms: 5000 - - embedding_provider: - name: "SentenceTransformer" - model: "all-MiniLM-L6-v2" - - llm_providers: - - - name: open-ai-gpt-4 - api_key: "$OPEN_AI_API_KEY" - model: gpt-4 - - - name: mistral_7b_instruct - model: mistral-7b-instruct - endpoint: http://mistral_7b_instruct:10001/v1/chat/completions - default: true - - - prompt_targets: - - - type: context_resolver - name: weather_forecast - few_shot_examples: - - what is the weather in New York? - - how is the weather in San Francisco? - - what is the forecast in Seattle? - entities: - - name: city - required: true - - name: days - endpoint: - cluster: weatherhost - path: /weather - system_prompt: | - You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: - - Use farenheight for temperature - - Use miles per hour for wind speed - vm_config: - runtime: "envoy.wasm.runtime.v8" - code: - local: - filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" - - name: envoy.filters.http.router - typed_config: - "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router - clusters: - # LLM Host - # Embedding Providers - # External LLM Providers - - name: openai - connect_timeout: 5s - type: LOGICAL_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: openai - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.openai.com - port_value: 443 - hostname: "api.openai.com" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.openai.com - common_tls_context: - tls_params: - tls_minimum_protocol_version: TLSv1_2 - tls_maximum_protocol_version: TLSv1_3 - - name: mistral - connect_timeout: 5s - type: LOGICAL_DNS - lb_policy: ROUND_ROBIN - typed_extension_protocol_options: - envoy.extensions.upstreams.http.v3.HttpProtocolOptions: - "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions - explicit_http_config: - http2_protocol_options: {} - load_assignment: - cluster_name: mistral - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: api.mistral.ai - port_value: 443 - hostname: "api.mistral.ai" - transport_socket: - name: envoy.transport_sockets.tls - typed_config: - "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext - sni: api.mistral.ai - common_tls_context: - tls_params: - tls_minimum_protocol_version: TLSv1_2 - tls_maximum_protocol_version: TLSv1_3 - - name: embeddingserver - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: embeddingserver - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: weatherhost - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: weatherhost - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: nerhost - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: nerhost - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: host.docker.internal - port_value: 8000 - hostname: "embeddingserver" - - name: qdrant - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: qdrant - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: qdrant - port_value: 6333 - hostname: "qdrant" - - name: mistral_7b_instruct - connect_timeout: 5s - type: STRICT_DNS - lb_policy: ROUND_ROBIN - load_assignment: - cluster_name: qdrant - endpoints: - - lb_endpoints: - - endpoint: - address: - socket_address: - address: mistral_7b_instruct - port_value: 10001 - hostname: "mistral_7b_instruct" diff --git a/arch/katanemo-config.yaml b/arch/katanemo-config.yaml deleted file mode 100644 index 273902c3..00000000 --- a/arch/katanemo-config.yaml +++ /dev/null @@ -1,37 +0,0 @@ -default_prompt_endpoint: "127.0.0.1" -load_balancing: "round_robin" -timeout_ms: 5000 - -llm_providers: - - - name: "open-ai-gpt-4" - api_key: "$OPEN_AI_API_KEY" - model: gpt-4 - -prompt_targets: - - - type: context_resolver - name: weather_forecast - few_shot_examples: - - what is the weather in New York? - - how is the weather in San Francisco? - - what is the forecast in Chicago? - entities: - - name: city - required: true - - name: days - endpoint: - cluster: weatherhost - path: /weather - system_prompt: | - You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: - - Use farenheight for temperature - - Use miles per hour for wind speed - -#TODO: add support for adding custom clusters e.g. - # clusters: - # qdrant: - # options: - # - address: "qdrant" - # - address: "weatherhost" - # - port: 6333