improve service names (#54)

- embedding-server => model_server
- public-types => public_types
- chatbot-ui => chatbot_ui
- function-calling => function_calling
This commit is contained in:
Adil Hafeez 2024-09-17 08:47:35 -07:00 committed by GitHub
parent 215f96e273
commit 060a0d665e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
35 changed files with 54 additions and 52 deletions

View file

@ -0,0 +1,25 @@
FROM Bolt-Function-Calling-1B-Q3_K_L.gguf
# Set the size of the context window used to generate the next token
# PARAMETER num_ctx 16384
PARAMETER num_ctx 4096
# Set parameters for response generation
PARAMETER num_predict 1024
PARAMETER temperature 0.1
PARAMETER top_p 0.5
PARAMETER top_k 32022
PARAMETER repeat_penalty 1.0
PARAMETER stop "<|EOT|>"
# Set the random number seed to use for generation
PARAMETER seed 42
# Set the prompt template to be passed into the model
TEMPLATE """{{ if .System }}<begin▁of▁sentence>
{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}
{{ end }}### Response:
{{ .Response }}
<|EOT|>"""

View file

@ -0,0 +1,24 @@
FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
# Set the size of the context window used to generate the next token
PARAMETER num_ctx 4096
# Set parameters for response generation
PARAMETER num_predict 1024
PARAMETER temperature 0.1
PARAMETER top_p 0.5
PARAMETER top_k 32022
PARAMETER repeat_penalty 1.0
PARAMETER stop "<|EOT|>"
# Set the random number seed to use for generation
PARAMETER seed 42
# Set the prompt template to be passed into the model
TEMPLATE """{{ if .System }}<begin▁of▁sentence>
{{ .System }}
{{ end }}{{ if .Prompt }}### Instruction:
{{ .Prompt }}
{{ end }}### Response:
{{ .Response }}
<|EOT|>"""

View file

@ -0,0 +1,32 @@
# Function calling
This demo shows how you can use intelligent prompt gateway to do function calling. This demo assumes you are using ollama running natively. If you want to run ollama running inside docker then please update ollama endpoint in docker-compose file.
# Startig the demo
1. Ensure that submodule is up to date
```sh
git submodule sync --recursive
```
1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
1. Start services
```sh
docker compose up
```
1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
1. If running ollama natively run
```sh
ollama serve
```
2. Create model file in ollama repository
```sh
ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
```
3. Navigate to http://localhost:18080/
4. You can type in queries like "how is the weather in Seattle"
- You can also ask follow up questions like "show me sunny days"
5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
- Open up dahsboard named "Intelligent Gateway Overview"
- On this dashboard you can see reuqest latency and number of requests
Here is sample interaction,
<img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">

View file

@ -0,0 +1,41 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
# should not be here
embedding_provider:
name: "bge-large-en-v1.5"
model: "BAAI/bge-large-en-v1.5"
llm_providers:
- name: open-ai-gpt-4
api_key: $OPEN_AI_API_KEY
model: gpt-4
default: true
prompt_targets:
- type: function_resolver
name: weather_forecast
description: This function resolver provides weather forecast information for a given city.
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Chicago?
parameters:
- name: city
required: true
description: The city for which the weather forecast is requested.
- name: days
description: The number of days for which the weather forecast is requested.
- name: units
description: The units in which the weather forecast is requested.
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed

View file

@ -0,0 +1,112 @@
services:
config_generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
- ./bolt-config.yaml:/usr/src/app/bolt-config.yaml
- ./generated:/usr/src/app/out
bolt:
build:
context: ../../
dockerfile: envoyfilter/Dockerfile
hostname: bolt
ports:
- "10000:10000"
- "19901:9901"
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
depends_on:
config_generator:
condition: service_completed_successfully
model_server:
condition: service_healthy
environment:
- LOG_LEVEL=debug
model_server:
build:
context: ../../model_server
dockerfile: Dockerfile
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
function_resolver:
build:
context: ../../function_resolver
dockerfile: Dockerfile
ports:
- "18082:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
environment:
# use ollama endpoint that is hosted by host machine (no virtualization)
- OLLAMA_ENDPOINT=host.docker.internal
# uncomment following line to use ollama endpoint that is hosted by docker
# - OLLAMA_ENDPOINT=ollama
ollama:
image: ollama/ollama
container_name: ollama
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
ports:
- '11434:11434'
profiles:
- manual
chatbot_ui:
build:
context: ../../chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1/chat/completions
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yaml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
profiles:
- monitoring
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
profiles:
- monitoring

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -0,0 +1,355 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - internal (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - external (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Upstream request count",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Intelligent Gateway Overview",
"uid": "adt6uhx5lk8aob",
"version": 3,
"weekStart": ""
}

View file

@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -0,0 +1,23 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- envoy:9901
params:
format: ['prometheus']