archgw cli (#117)

* initial commit of the insurange agent demo, with the CLI tool * committing the cli * fixed some field descriptions for generate-prompt-targets * CLI works with buil, up and down commands. Function calling example works stand-alone * fixed README to install archgw cli * fixing based on feedback * fixing based on feedback --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
2026-05-02 04:12:56 +02:00 · 2024-10-03 18:21:27 -07:00 · 2024-10-03 18:21:27 -07:00 · dc57f119a0
commit dc57f119a0
parent af018e5fd8
30 changed files with 1087 additions and 203 deletions
--- a/demos/function_calling/README.md
+++ b/demos/function_calling/README.md
@ -28,7 +28,7 @@ This demo shows how you can use intelligent prompt gateway to do function callin
   - On this dashboard you can see reuqest latency and number of requests

 # Observability
-Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from envoy and we are using grafan to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
+Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,

 1. Start grafana and prometheus using following command
   ```yaml
--- a/demos/function_calling/arch_config.yaml
+++ b/demos/function_calling/arch_config.yaml
@ -8,7 +8,7 @@ listener:

 endpoints:
  api_server:
-    endpoint: api_server:80
+    endpoint: host.docker.internal:18083
    connect_timeout: 0.005s

 overrides:
@ -17,16 +17,17 @@ overrides:

 llm_providers:
  - name: open-ai-gpt-4
-    access_key: $OPENAI_ACCESS_KEY
+    access_key: $OPENAI_API_KEY
    provider: openai
    model: gpt-4
    default: true
  - name: mistral-large-latest
-    access_key: $MISTRAL_ACCESS_KEY
+    access_key: $MISTRAL_API_KEY
    provider: mistral
    model: large-latest

-system_prompt: You are a helpful assistant.
+system_prompt: |
+  You are a helpful assistant.

 prompt_targets:
  - name: weather_forecast
--- a/demos/function_calling/docker-compose.yaml
+++ b/demos/function_calling/docker-compose.yaml
@ -1,54 +1,4 @@
-
-x-variables: &common-vars
-  environment:
-    - MODE=${MODE:-cloud}  # Set the default mode to 'cloud', others values are local-gpu, local-cpu
-
-
 services:
-
-  arch:
-    build:
-      context: ../../
-      dockerfile: arch/Dockerfile
-    ports:
-      - "10000:10000"
-      - "19901:9901"
-    volumes:
-      - /etc/ssl/cert.pem:/etc/ssl/cert.pem
-      - ./arch_log:/var/log/
-      - ./arch_config.yaml:/config/arch_config.yaml
-    depends_on:
-      # config_generator:
-      #   condition: service_completed_successfully
-      model_server:
-        condition: service_healthy
-    environment:
-      - LOG_LEVEL=debug
-      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
-      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
-
-  model_server:
-    build:
-      context: ../../model_server
-      dockerfile: Dockerfile
-    ports:
-      - "18081:80"
-    healthcheck:
-        test: ["CMD", "curl" ,"http://localhost/healthz"]
-        interval: 5s
-        retries: 20
-    volumes:
-      - ~/.cache/huggingface:/root/.cache/huggingface
-      - ./arch_config.yaml:/root/arch_config.yaml
-    << : *common-vars
-    environment:
-      - OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
-      - FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
-      - OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
-      - MODE=${MODE:-cloud}
-      # uncomment following line to use ollama endpoint that is hosted by docker
-      # - OLLAMA_ENDPOINT=ollama
-      # - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
  api_server:
    build:
      context: api_server
@ -60,45 +10,16 @@ services:
        interval: 5s
        retries: 20

-  ollama:
-    image: ollama/ollama
-    container_name: ollama
-    volumes:
-      - ./ollama:/root/.ollama
-    restart: unless-stopped
-    ports:
-      - '11434:11434'
-    profiles:
-      - manual
-
-  open_webui:
-    image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
-    container_name: open-webui
-    volumes:
-      - ./open-webui:/app/backend/data
-    # depends_on:
-      # - ollama
-    ports:
-      - 18090:8080
-    environment:
-      - OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434
-      - WEBUI_AUTH=false
-    extra_hosts:
-      - host.docker.internal:host-gateway
-    restart: unless-stopped
-    profiles:
-      - monitoring
-
  chatbot_ui:
    build:
      context: ../../chatbot_ui
      dockerfile: Dockerfile
    ports:
-      - "18080:8080"
+      - "18090:8080"
    environment:
      - OPENAI_API_KEY=${OPENAI_API_KEY:?error}
      - MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
-      - CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
+      - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1

  prometheus:
    image: prom/prometheus