simplify developer getting started experience (#102)

* Fixed build. Now, we have a bare bones version of the docker-compose file with only two services, archgw and archgw-model-server. Tested using CLI * some pre-commit fixes * fixed cargo formatting issues * fixed model server conflict changes --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-06-23 15:38:07 +02:00 · 2024-10-01 10:02:23 -07:00 · 2024-10-01 10:02:23 -07:00 · 8654d3d5c5
commit 8654d3d5c5
parent 41cdef590a
20 changed files with 53 additions and 407 deletions
--- a/arch/Dockerfile
+++ b/arch/Dockerfile
@ -10,8 +10,17 @@ COPY public_types /public_types
 RUN cargo build --release --target wasm32-wasi

 # copy built filter into envoy image
-FROM envoyproxy/envoy:v1.30-latest
+FROM envoyproxy/envoy:v1.30-latest as envoy
+
+#Build config generator, so that we have a single build image for both Rust and Python
+FROM python:3-slim as arch
 COPY --from=builder /arch/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
-# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"]
-# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"]
-CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--component-log-level", "wasm:debug"]
+COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
+WORKDIR /config
+COPY arch/requirements.txt .
+RUN pip install -r requirements.txt
+COPY arch/config_generator.py .
+COPY arch/envoy.template.yaml .
+COPY arch/arch_config_schema.yaml .
+
+CMD ["sh", "-c", "python config_generator.py && envoy -c /etc/envoy/envoy.yaml --component-log-level wasm:debug"]
--- a/arch/arch_config_schema.yaml
+++ b/arch/arch_config_schema.yaml
@ -0,0 +1,147 @@
+$schema: "http://json-schema.org/draft-07/schema#"
+type: object
+properties:
+  version:
+    type: string
+  listener:
+    type: object
+    properties:
+      address:
+        type: string
+      port:
+        type: integer
+      message_format:
+        type: string
+      connect_timeout:
+        type: string
+    additionalProperties: false
+    required:
+      - address
+      - port
+  endpoints:
+    type: object
+    patternProperties:
+      "^.*$":
+        type: object
+        properties:
+          endpoint:
+            type: string
+          connect_timeout:
+            type: string
+        additionalProperties: false
+        required:
+          - endpoint
+  llm_providers:
+    type: array
+    items:
+      type: object
+      properties:
+        name:
+          type: string
+        access_key:
+          type: string
+        model:
+          type: string
+        default:
+          type: boolean
+      additionalProperties: false
+      required:
+        - name
+        - access_key
+        - model
+  overrides:
+    type: object
+    properties:
+      prompt_target_intent_matching_threshold:
+        type: number
+  system_prompt:
+    type: string
+  prompt_targets:
+    type: array
+    items:
+      type: object
+      properties:
+        name:
+          type: string
+        default:
+          type: boolean
+        description:
+          type: string
+        parameters:
+          type: array
+          items:
+            type: object
+            properties:
+              name:
+                type: string
+              additionalProperties: false
+              required:
+                type: boolean
+              default:
+                type: string
+              description:
+                type: string
+              type:
+                type: string
+            additionalProperties: false
+            required:
+              - name
+              - description
+              - type
+        endpoint:
+          type: object
+          properties:
+            name:
+              type: string
+            path:
+              type: string
+          additionalProperties: false
+          required:
+            - name
+            - path
+        system_prompt:
+          type: string
+      additionalProperties: false
+      required:
+        - name
+        - description
+  ratelimits:
+    type: array
+    items:
+      type: object
+      properties:
+        provider:
+          type: string
+        selector:
+          type: object
+          properties:
+            key:
+              type: string
+            value:
+              type: string
+          additionalProperties: false
+          required:
+            - key
+            - value
+        limit:
+          type: object
+          properties:
+            tokens:
+              type: integer
+            unit:
+              type: string
+          additionalProperties: false
+          required:
+            - tokens
+            - unit
+      additionalProperties: false
+      required:
+        - provider
+        - selector
+        - limit
+additionalProperties: false
+required:
+  - version
+  - listener
+  - llm_providers
+  - prompt_targets
--- a/arch/config_generator.py
+++ b/arch/config_generator.py
@ -0,0 +1,66 @@
+import os
+from jinja2 import Environment, FileSystemLoader
+import yaml
+from jsonschema import validate
+
+ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
+ARCH_CONFIG_FILE = os.getenv('ARCH_CONFIG_FILE', '/config/arch_config.yaml')
+ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/etc/envoy/envoy.yaml')
+ARCH_CONFIG_SCHEMA_FILE = os.getenv('ARCH_CONFIG_SCHEMA_FILE', 'arch_config_schema.yaml')
+
+env = Environment(loader=FileSystemLoader('./'))
+template = env.get_template('envoy.template.yaml')
+
+with open(ARCH_CONFIG_FILE, 'r') as file:
+    katanemo_config = file.read()
+
+with open(ARCH_CONFIG_SCHEMA_FILE, 'r') as file:
+    arch_config_schema = file.read()
+
+config_yaml = yaml.safe_load(katanemo_config)
+config_schema_yaml = yaml.safe_load(arch_config_schema)
+
+try:
+  validate(config_yaml, config_schema_yaml)
+except Exception as e:
+  print(f"Error validating arch_config file: {ARCH_CONFIG_FILE}, error: {e.message}")
+  exit(1)
+
+inferred_clusters = {}
+
+for prompt_target in config_yaml["prompt_targets"]:
+    name = prompt_target.get("endpoint", {}).get("name", "")
+    if name not in inferred_clusters:
+      inferred_clusters[name] = {
+          "name": name,
+          "port": 80, # default port
+      }
+
+print(inferred_clusters)
+
+endpoints = config_yaml.get("endpoints", {})
+
+# override the inferred clusters with the ones defined in the config
+for name, endpoint_details in endpoints.items():
+    if name in inferred_clusters:
+        print("updating cluster", endpoint_details)
+        inferred_clusters[name].update(endpoint_details)
+        endpoint = inferred_clusters[name]['endpoint']
+        if len(endpoint.split(':')) > 1:
+            inferred_clusters[name]['endpoint'] = endpoint.split(':')[0]
+            inferred_clusters[name]['port'] = int(endpoint.split(':')[1])
+    else:
+        inferred_clusters[name] = endpoint_details
+
+print("updated clusters", inferred_clusters)
+
+data = {
+    'katanemo_config': katanemo_config,
+    'arch_clusters': inferred_clusters
+}
+
+rendered = template.render(data)
+print(rendered)
+print(ENVOY_CONFIG_FILE_RENDERED)
+with open(ENVOY_CONFIG_FILE_RENDERED, 'w') as file:
+    file.write(rendered)
--- a/arch/docker-compose.yaml
+++ b/arch/docker-compose.yaml
@ -1,43 +1,28 @@
 services:
-  envoy:
-    image: envoyproxy/envoy:v1.30-latest
-    hostname: envoy
+  archgw:
+    build:
+      context: ../
+      dockerfile: arch/Dockerfile
    ports:
      - "10000:10000"
-      - "19901:9901"
+      - "18080:9901"
    volumes:
-      - ./envoy.yaml:/etc/envoy/envoy.yaml
-      - ./target/wasm32-wasi/release:/etc/envoy/proxy-wasm-plugins
+      - ${ARCH_CONFIG_FILE}:/config/arch_config.yaml
      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
+      - ./arch_log:/var/log/
    depends_on:
-      qdrant:
-        condition: service_started
-      embeddingserver:
+      archgw_model_server:
        condition: service_healthy

-  embeddingserver:
+  archgw_model_server:
    build:
-      context: ../embedding-server
+      context: ../model_server
      dockerfile: Dockerfile
    ports:
-      - "18080:80"
+      - "18081:80"
    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost:80/healthz"]
+        test: ["CMD", "curl" ,"http://localhost/healthz"]
        interval: 5s
        retries: 20
-
-  qdrant:
-    image: qdrant/qdrant
-    hostname: vector-db
-    ports:
-      - 16333:6333
-      - 16334:6334
-
-  chatbot-ui:
-    build:
-      context: ../chatbot-ui
-      dockerfile: Dockerfile
-    ports:
-      - "18080:8080"
-    environment:
-      - CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1
+    volumes:
+      - ~/.cache/huggingface:/root/.cache/huggingface
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -132,20 +132,20 @@ static_resources:
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          sni: api.mistral.ai
-    - name: model_server
+    - name: archgw_model_server
      connect_timeout: 5s
      type: STRICT_DNS
      lb_policy: ROUND_ROBIN
      load_assignment:
-        cluster_name: model_server
+        cluster_name: archgw_model_server
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
-                      address: model_server
+                      address: archgw_model_server
                      port_value: 80
-                  hostname: "model_server"
+                  hostname: "archgw_model_server"
    - name: mistral_7b_instruct
      connect_timeout: 5s
      type: STRICT_DNS
@ -171,7 +171,7 @@ static_resources:
              - endpoint:
                  address:
                    socket_address:
-                      address: model_server
+                      address: archgw_model_server
                      port_value: 80
                  hostname: "arch_fc"
 {% for _, cluster in arch_clusters.items() %}
--- a/arch/init_vector_store.sh
+++ b/arch/init_vector_store.sh
@ -1,16 +0,0 @@
-#!/bin/sh
-
-echo 'Deleting prompt_vector_store collection'
-curl -X DELETE http://localhost:16333/collections/prompt_vector_store
-echo
-echo 'Creating prompt_vector_store collection'
-curl -X PUT 'http://localhost:16333/collections/prompt_vector_store' \
-  -H 'Content-Type: application/json' \
-  --data-raw '{
-    "vectors": {
-      "size": 1024,
-      "distance": "Cosine"
-    }
-  }'
-echo
-echo 'Created prompt_vector_store collection'
--- a/arch/requirements.txt
+++ b/arch/requirements.txt
@ -0,0 +1,3 @@
+jinja2
+pyyaml
+jsonschema
--- a/arch/src/consts.rs
+++ b/arch/src/consts.rs
@ -7,6 +7,6 @@ pub const USER_ROLE: &str = "user";
 pub const GPT_35_TURBO: &str = "gpt-3.5-turbo";
 pub const ARC_FC_CLUSTER: &str = "arch_fc";
 pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes
-pub const MODEL_SERVER_NAME: &str = "model_server";
+pub const MODEL_SERVER_NAME: &str = "archgw_model_server";
 pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider";
 pub const ARCH_MESSAGES_KEY: &str = "arch_messages";
--- a/arch/src/filter_context.rs
+++ b/arch/src/filter_context.rs
@ -141,7 +141,10 @@ impl FilterContext {
        ) {
            Ok(token_id) => token_id,
            Err(e) => {
-                panic!("Error dispatching HTTP call: {:?}", e);
+                panic!(
+                    "Error dispatching HTTP call: {}, error: {:?}",
+                    MODEL_SERVER_NAME, e
+                );
            }
        };
        token_id
--- a/arch/tests/integration.rs
+++ b/arch/tests/integration.rs
@ -104,7 +104,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
        // The actual call is not important in this test, we just need to grab the token_id
-        .expect_http_call(Some("model_server"), None, None, None, None)
+        .expect_http_call(Some("archgw_model_server"), None, None, None, None)
        .returning(Some(1))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_increment("active_http_calls", 1)
@ -136,7 +136,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
        .returning(Some(&embeddings_response_buffer))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Debug), None)
-        .expect_http_call(Some("model_server"), None, None, None, None)
+        .expect_http_call(Some("archgw_model_server"), None, None, None, None)
        .returning(Some(2))
        .expect_metric_increment("active_http_calls", 1)
        .expect_log(Some(LogLevel::Debug), None)
@ -313,7 +313,7 @@ fn successful_request_to_open_ai_chat_completions() {
        .returning(Some(chat_completions_request_body))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_log(Some(LogLevel::Info), None)
-        .expect_http_call(Some("model_server"), None, None, None, None)
+        .expect_http_call(Some("archgw_model_server"), None, None, None, None)
        .returning(Some(4))
        .expect_metric_increment("active_http_calls", 1)
        .execute_and_expect(ReturnType::Action(Action::Pause))