diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 67f03f83..8fbd3c69 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: v4.6.0 hooks: - id: check-yaml - exclude: arch/envoy.template.yaml + exclude: arch/envoy.template* - id: end-of-file-fixer - id: trailing-whitespace - repo: local diff --git a/arch/docker-compose.dev.yaml b/arch/docker-compose.dev.yaml new file mode 100644 index 00000000..b019f52f --- /dev/null +++ b/arch/docker-compose.dev.yaml @@ -0,0 +1,32 @@ +services: + archgw: + image: archgw:latest + ports: + - "10000:10000" + - "19901:9901" + volumes: + - ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_config.yaml}:/config/arch_config.yaml + - /etc/ssl/cert.pem:/etc/ssl/cert.pem + - ./envoy.template.dev.yaml:/config/envoy.template.yaml + - ./target/wasm32-wasi/release/intelligent_prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm + depends_on: + model_server: + condition: service_healthy + env_file: + - stage.env + + model_server: + image: model_server:latest + ports: + - "18081:80" + healthcheck: + test: ["CMD", "curl" ,"http://localhost/healthz"] + interval: 5s + retries: 20 + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + environment: + - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal} + - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M + - MODE=${MODE:-cloud} + - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1} diff --git a/arch/envoy.template.dev.yaml b/arch/envoy.template.dev.yaml new file mode 100644 index 00000000..6b9d82e1 --- /dev/null +++ b/arch/envoy.template.dev.yaml @@ -0,0 +1,184 @@ +admin: + address: + socket_address: { address: 0.0.0.0, port_value: 9901 } +static_resources: + listeners: + address: + socket_address: + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + stat_prefix: arch_ingress_http + codec_type: HTTP1 + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/arch_access.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + - match: + prefix: "/mistral/v1/chat/completions" + route: + auto_host_rewrite: true + cluster: mistral_7b_instruct + timeout: 60s + {% for provider in arch_llm_providers %} + - match: + prefix: "/" + headers: + - name: "x-arch-llm-provider" + string_match: + exact: {{ provider.name }} + route: + auto_host_rewrite: true + cluster: {{ provider.provider }} + timeout: 60s + {% endfor %} + http_filters: + - name: envoy.filters.http.wasm + typed_config: + "@type": type.googleapis.com/udpa.type.v1.TypedStruct + type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm + value: + config: + name: "http_config" + configuration: + "@type": "type.googleapis.com/google.protobuf.StringValue" + value: | + {{ arch_config | indent(30) }} + vm_config: + runtime: "envoy.wasm.runtime.v8" + code: + local: + filename: "/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm" + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + clusters: + - name: openai + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + # typed_extension_protocol_options: + # envoy.extensions.upstreams.http.v3.HttpProtocolOptions: + # "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions + # explicit_http_config: + # http2_protocol_options: {} + load_assignment: + cluster_name: openai + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.openai.com + port_value: 443 + hostname: "api.openai.com" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.openai.com + common_tls_context: + tls_params: + tls_minimum_protocol_version: TLSv1_2 + tls_maximum_protocol_version: TLSv1_3 + - name: mistral + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: mistral + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: api.mistral.ai + port_value: 443 + hostname: "api.mistral.ai" + transport_socket: + name: envoy.transport_sockets.tls + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + sni: api.mistral.ai + - name: model_server + connect_timeout: 5s + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: model_server + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: host.docker.internal + port_value: 8000 + hostname: "model_server" + - name: mistral_7b_instruct + connect_timeout: 5s + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: mistral_7b_instruct + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: mistral_7b_instruct + port_value: 10001 + hostname: "mistral_7b_instruct" + - name: arch_fc + connect_timeout: 5s + type: STRICT_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: arch_fc + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: host.docker.internal + port_value: 8000 + hostname: "arch_fc" +{% for _, cluster in arch_clusters.items() %} + - name: {{ cluster.name }} + {% if cluster.connect_timeout -%} + connect_timeout: {{ cluster.connect_timeout }} + {% else -%} + connect_timeout: 5s + {% endif -%} + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: {{ cluster.name }} + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: {{ cluster.endpoint }} + port_value: {{ cluster.port }} + hostname: {{ cluster.name }} +{% endfor %}