mirror of
https://github.com/katanemo/plano.git
synced 2026-05-02 04:12:56 +02:00
archgw cli (#117)
* initial commit of the insurange agent demo, with the CLI tool * committing the cli * fixed some field descriptions for generate-prompt-targets * CLI works with buil, up and down commands. Function calling example works stand-alone * fixed README to install archgw cli * fixing based on feedback * fixing based on feedback --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-261.local>
This commit is contained in:
parent
af018e5fd8
commit
dc57f119a0
30 changed files with 1087 additions and 203 deletions
|
|
@ -28,7 +28,7 @@ This demo shows how you can use intelligent prompt gateway to do function callin
|
|||
- On this dashboard you can see reuqest latency and number of requests
|
||||
|
||||
# Observability
|
||||
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from envoy and we are using grafan to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
|
||||
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
|
||||
|
||||
1. Start grafana and prometheus using following command
|
||||
```yaml
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ listener:
|
|||
|
||||
endpoints:
|
||||
api_server:
|
||||
endpoint: api_server:80
|
||||
endpoint: host.docker.internal:18083
|
||||
connect_timeout: 0.005s
|
||||
|
||||
overrides:
|
||||
|
|
@ -17,16 +17,17 @@ overrides:
|
|||
|
||||
llm_providers:
|
||||
- name: open-ai-gpt-4
|
||||
access_key: $OPENAI_ACCESS_KEY
|
||||
access_key: $OPENAI_API_KEY
|
||||
provider: openai
|
||||
model: gpt-4
|
||||
default: true
|
||||
- name: mistral-large-latest
|
||||
access_key: $MISTRAL_ACCESS_KEY
|
||||
access_key: $MISTRAL_API_KEY
|
||||
provider: mistral
|
||||
model: large-latest
|
||||
|
||||
system_prompt: You are a helpful assistant.
|
||||
system_prompt: |
|
||||
You are a helpful assistant.
|
||||
|
||||
prompt_targets:
|
||||
- name: weather_forecast
|
||||
|
|
|
|||
|
|
@ -1,54 +1,4 @@
|
|||
|
||||
x-variables: &common-vars
|
||||
environment:
|
||||
- MODE=${MODE:-cloud} # Set the default mode to 'cloud', others values are local-gpu, local-cpu
|
||||
|
||||
|
||||
services:
|
||||
|
||||
arch:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: arch/Dockerfile
|
||||
ports:
|
||||
- "10000:10000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ./arch_log:/var/log/
|
||||
- ./arch_config.yaml:/config/arch_config.yaml
|
||||
depends_on:
|
||||
# config_generator:
|
||||
# condition: service_completed_successfully
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18081:80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl" ,"http://localhost/healthz"]
|
||||
interval: 5s
|
||||
retries: 20
|
||||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
- ./arch_config.yaml:/root/arch_config.yaml
|
||||
<< : *common-vars
|
||||
environment:
|
||||
- OLLAMA_ENDPOINT=${OLLAMA_ENDPOINT:-host.docker.internal}
|
||||
- FC_URL=${FC_URL:-https://arch-fc-free-trial-4mzywewe.uc.gateway.dev/v1}
|
||||
- OLLAMA_MODEL=Arch-Function-Calling-3B-Q4_K_M
|
||||
- MODE=${MODE:-cloud}
|
||||
# uncomment following line to use ollama endpoint that is hosted by docker
|
||||
# - OLLAMA_ENDPOINT=ollama
|
||||
# - OLLAMA_MODEL=Arch-Function-Calling-1.5B:Q4_K_M
|
||||
api_server:
|
||||
build:
|
||||
context: api_server
|
||||
|
|
@ -60,45 +10,16 @@ services:
|
|||
interval: 5s
|
||||
retries: 20
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama
|
||||
container_name: ollama
|
||||
volumes:
|
||||
- ./ollama:/root/.ollama
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '11434:11434'
|
||||
profiles:
|
||||
- manual
|
||||
|
||||
open_webui:
|
||||
image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main}
|
||||
container_name: open-webui
|
||||
volumes:
|
||||
- ./open-webui:/app/backend/data
|
||||
# depends_on:
|
||||
# - ollama
|
||||
ports:
|
||||
- 18090:8080
|
||||
environment:
|
||||
- OLLAMA_BASE_URL=http://${OLLAMA_ENDPOINT:-host.docker.internal}:11434
|
||||
- WEBUI_AUTH=false
|
||||
extra_hosts:
|
||||
- host.docker.internal:host-gateway
|
||||
restart: unless-stopped
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
chatbot_ui:
|
||||
build:
|
||||
context: ../../chatbot_ui
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18080:8080"
|
||||
- "18090:8080"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
- CHAT_COMPLETION_ENDPOINT=http://arch:10000/v1
|
||||
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue