archgw cli (#117)

* initial commit of the insurange agent demo, with the CLI tool

* committing the cli

* fixed some field descriptions for generate-prompt-targets

* CLI works with buil, up and down commands. Function calling example works stand-alone

* fixed README to install archgw cli

* fixing based on feedback

* fixing based on feedback

---------

Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
Salman Paracha 2024-10-03 18:21:27 -07:00 committed by GitHub
parent af018e5fd8
commit dc57f119a0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 1087 additions and 203 deletions

View file

@ -28,7 +28,7 @@ This demo shows how you can use intelligent prompt gateway to do function callin
- On this dashboard you can see reuqest latency and number of requests
# Observability
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from envoy and we are using grafan to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
1. Start grafana and prometheus using following command
```yaml

View file

@ -8,7 +8,7 @@ listener:
endpoints:
api_server:
endpoint: api_server:80
endpoint: host.docker.internal:18083
connect_timeout: 0.005s
overrides:
@ -17,16 +17,17 @@ overrides:
llm_providers:
- name: open-ai-gpt-4
access_key: $OPENAI_ACCESS_KEY
access_key: $OPENAI_API_KEY
provider: openai
model: gpt-4
default: true
- name: mistral-large-latest
access_key: $MISTRAL_ACCESS_KEY
access_key: $MISTRAL_API_KEY
provider: mistral
model: large-latest
system_prompt: You are a helpful assistant.
system_prompt: |
You are a helpful assistant.
prompt_targets:
- name: weather_forecast

View file

@ -1,54 +1,4 @@
x-variables: &common-vars
environment:
- MODE=${MODE:-cloud} # Set the default mode to 'cloud', others values are local-gpu, local-cpu
services:
arch:
build:
context: ../../
dockerfile: arch/Dockerfile
ports:
- "10000:10000"
- "19901:9901"
volumes:
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./arch_log:/var/log/
- ./arch_config.yaml:/config/arch_config.yaml
depends_on:
# config_generator:
# condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
- ./arch_config.yaml:/root/arch_config.yaml
<< : *common-vars
environment:
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
- FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
- MODE=${MODE:-cloud}
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
# - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
api_server:
build:
context: api_server
@ -60,45 +10,16 @@ services:
interval: 5s
retries: 20
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
ports:
- '11434:11434'
profiles:
- manual
open_webui:
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
container_name: open-webui
volumes:
- ./open-webui:/app/backend/data
# depends_on:
# - ollama
ports:
- 18090:8080
environment:
- OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434
- WEBUI_AUTH=false
extra_hosts:
- host.docker.internal:host-gateway
restart: unless-stopped
profiles:
- monitoring
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
- "18090:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
prometheus:
image: prom/prometheus