mirror of
https://github.com/katanemo/plano.git
synced 2026-04-27 09:46:28 +02:00
improve service names (#54)
- embedding-server => model_server - public-types => public_types - chatbot-ui => chatbot_ui - function-calling => function_calling
This commit is contained in:
parent
215f96e273
commit
060a0d665e
35 changed files with 54 additions and 52 deletions
25
demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
Normal file
25
demos/function_calling/Bolt-FC-1B-Q3_K_L.model_file
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
FROM Bolt-Function-Calling-1B-Q3_K_L.gguf
|
||||
|
||||
# Set the size of the context window used to generate the next token
|
||||
# PARAMETER num_ctx 16384
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# Set parameters for response generation
|
||||
PARAMETER num_predict 1024
|
||||
PARAMETER temperature 0.1
|
||||
PARAMETER top_p 0.5
|
||||
PARAMETER top_k 32022
|
||||
PARAMETER repeat_penalty 1.0
|
||||
PARAMETER stop "<|EOT|>"
|
||||
|
||||
# Set the random number seed to use for generation
|
||||
PARAMETER seed 42
|
||||
|
||||
# Set the prompt template to be passed into the model
|
||||
TEMPLATE """{{ if .System }}<|begin▁of▁sentence|>
|
||||
{{ .System }}
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
<|EOT|>"""
|
||||
24
demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
Normal file
24
demos/function_calling/Bolt-FC-1B-Q4_K_M.model_file
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
FROM Bolt-Function-Calling-1B-Q4_K_M.gguf
|
||||
|
||||
# Set the size of the context window used to generate the next token
|
||||
PARAMETER num_ctx 4096
|
||||
|
||||
# Set parameters for response generation
|
||||
PARAMETER num_predict 1024
|
||||
PARAMETER temperature 0.1
|
||||
PARAMETER top_p 0.5
|
||||
PARAMETER top_k 32022
|
||||
PARAMETER repeat_penalty 1.0
|
||||
PARAMETER stop "<|EOT|>"
|
||||
|
||||
# Set the random number seed to use for generation
|
||||
PARAMETER seed 42
|
||||
|
||||
# Set the prompt template to be passed into the model
|
||||
TEMPLATE """{{ if .System }}<|begin▁of▁sentence|>
|
||||
{{ .System }}
|
||||
{{ end }}{{ if .Prompt }}### Instruction:
|
||||
{{ .Prompt }}
|
||||
{{ end }}### Response:
|
||||
{{ .Response }}
|
||||
<|EOT|>"""
|
||||
32
demos/function_calling/README.md
Normal file
32
demos/function_calling/README.md
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
# Function calling
|
||||
This demo shows how you can use intelligent prompt gateway to do function calling. This demo assumes you are using ollama running natively. If you want to run ollama running inside docker then please update ollama endpoint in docker-compose file.
|
||||
|
||||
# Startig the demo
|
||||
1. Ensure that submodule is up to date
|
||||
```sh
|
||||
git submodule sync --recursive
|
||||
```
|
||||
1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
|
||||
1. Start services
|
||||
```sh
|
||||
docker compose up
|
||||
```
|
||||
1. Download Bolt-FC model. This demo assumes we have downloaded [Bolt-Function-Calling-1B:Q4_K_M](https://huggingface.co/katanemolabs/Bolt-Function-Calling-1B.gguf/blob/main/Bolt-Function-Calling-1B-Q4_K_M.gguf) to local folder.
|
||||
1. If running ollama natively run
|
||||
```sh
|
||||
ollama serve
|
||||
```
|
||||
2. Create model file in ollama repository
|
||||
```sh
|
||||
ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file
|
||||
```
|
||||
3. Navigate to http://localhost:18080/
|
||||
4. You can type in queries like "how is the weather in Seattle"
|
||||
- You can also ask follow up questions like "show me sunny days"
|
||||
5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
|
||||
- Open up dahsboard named "Intelligent Gateway Overview"
|
||||
- On this dashboard you can see reuqest latency and number of requests
|
||||
|
||||
Here is sample interaction,
|
||||
|
||||
<img width="575" alt="image" src="https://github.com/user-attachments/assets/e0929490-3eb2-4130-ae87-a732aea4d059">
|
||||
41
demos/function_calling/bolt-config.yaml
Normal file
41
demos/function_calling/bolt-config.yaml
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
default_prompt_endpoint: "127.0.0.1"
|
||||
load_balancing: "round_robin"
|
||||
timeout_ms: 5000
|
||||
|
||||
|
||||
# should not be here
|
||||
embedding_provider:
|
||||
name: "bge-large-en-v1.5"
|
||||
model: "BAAI/bge-large-en-v1.5"
|
||||
|
||||
llm_providers:
|
||||
|
||||
- name: open-ai-gpt-4
|
||||
api_key: $OPEN_AI_API_KEY
|
||||
model: gpt-4
|
||||
default: true
|
||||
|
||||
prompt_targets:
|
||||
|
||||
- type: function_resolver
|
||||
name: weather_forecast
|
||||
description: This function resolver provides weather forecast information for a given city.
|
||||
few_shot_examples:
|
||||
- what is the weather in New York?
|
||||
- how is the weather in San Francisco?
|
||||
- what is the forecast in Chicago?
|
||||
parameters:
|
||||
- name: city
|
||||
required: true
|
||||
description: The city for which the weather forecast is requested.
|
||||
- name: days
|
||||
description: The number of days for which the weather forecast is requested.
|
||||
- name: units
|
||||
description: The units in which the weather forecast is requested.
|
||||
endpoint:
|
||||
cluster: weatherhost
|
||||
path: /weather
|
||||
system_prompt: |
|
||||
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
|
||||
- Use farenheight for temperature
|
||||
- Use miles per hour for wind speed
|
||||
112
demos/function_calling/docker-compose.yaml
Normal file
112
demos/function_calling/docker-compose.yaml
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
|
||||
services:
|
||||
|
||||
config_generator:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: config_generator/Dockerfile
|
||||
volumes:
|
||||
- ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml
|
||||
- ./bolt-config.yaml:/usr/src/app/bolt-config.yaml
|
||||
- ./generated:/usr/src/app/out
|
||||
|
||||
bolt:
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: envoyfilter/Dockerfile
|
||||
hostname: bolt
|
||||
ports:
|
||||
- "10000:10000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
depends_on:
|
||||
config_generator:
|
||||
condition: service_completed_successfully
|
||||
model_server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
- LOG_LEVEL=debug
|
||||
|
||||
model_server:
|
||||
build:
|
||||
context: ../../model_server
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18081:80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
|
||||
interval: 5s
|
||||
retries: 20
|
||||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
|
||||
function_resolver:
|
||||
build:
|
||||
context: ../../function_resolver
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18082:80"
|
||||
healthcheck:
|
||||
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
|
||||
interval: 5s
|
||||
retries: 20
|
||||
volumes:
|
||||
- ~/.cache/huggingface:/root/.cache/huggingface
|
||||
environment:
|
||||
# use ollama endpoint that is hosted by host machine (no virtualization)
|
||||
- OLLAMA_ENDPOINT=host.docker.internal
|
||||
# uncomment following line to use ollama endpoint that is hosted by docker
|
||||
# - OLLAMA_ENDPOINT=ollama
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama
|
||||
container_name: ollama
|
||||
volumes:
|
||||
- ./ollama:/root/.ollama
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '11434:11434'
|
||||
profiles:
|
||||
- manual
|
||||
|
||||
chatbot_ui:
|
||||
build:
|
||||
context: ../../chatbot_ui
|
||||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "18080:8080"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
- CHAT_COMPLETION_ENDPOINT=http://bolt:10000/v1/chat/completions
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus
|
||||
container_name: prometheus
|
||||
command:
|
||||
- '--config.file=/etc/prometheus/prometheus.yaml'
|
||||
ports:
|
||||
- 9090:9090
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./prometheus:/etc/prometheus
|
||||
- ./prom_data:/prometheus
|
||||
profiles:
|
||||
- monitoring
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
container_name: grafana
|
||||
ports:
|
||||
- 3000:3000
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=grafana
|
||||
volumes:
|
||||
- ./grafana:/etc/grafana/provisioning/datasources
|
||||
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
|
||||
- ./grafana/dashboards:/var/lib/grafana/dashboards
|
||||
profiles:
|
||||
- monitoring
|
||||
12
demos/function_calling/grafana/dashboard.yaml
Normal file
12
demos/function_calling/grafana/dashboard.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: "Dashboard provider"
|
||||
orgId: 1
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: true
|
||||
355
demos/function_calling/grafana/dashboards/envoy_overview.json
Normal file
355
demos/function_calling/grafana/dashboards/envoy_overview.json
Normal file
|
|
@ -0,0 +1,355 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
|
||||
"fullMetaSearch": false,
|
||||
"hide": false,
|
||||
"includeNullMetadata": true,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "request latency - internal (ms)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
|
||||
"fullMetaSearch": false,
|
||||
"hide": false,
|
||||
"includeNullMetadata": true,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "request latency - external (ms)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
|
||||
"fullMetaSearch": false,
|
||||
"includeNullMetadata": true,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A",
|
||||
"useBackend": false
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "PBFA97CFB590B2093"
|
||||
},
|
||||
"disableTextWrap": false,
|
||||
"editorMode": "code",
|
||||
"expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
|
||||
"fullMetaSearch": false,
|
||||
"hide": false,
|
||||
"includeNullMetadata": true,
|
||||
"instant": false,
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "B",
|
||||
"useBackend": false
|
||||
}
|
||||
],
|
||||
"title": "Upstream request count",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-15m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Intelligent Gateway Overview",
|
||||
"uid": "adt6uhx5lk8aob",
|
||||
"version": 3,
|
||||
"weekStart": ""
|
||||
}
|
||||
9
demos/function_calling/grafana/datasource.yaml
Normal file
9
demos/function_calling/grafana/datasource.yaml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
url: http://prometheus:9090
|
||||
isDefault: true
|
||||
access: proxy
|
||||
editable: true
|
||||
23
demos/function_calling/prometheus/prometheus.yaml
Normal file
23
demos/function_calling/prometheus/prometheus.yaml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
global:
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
evaluation_interval: 15s
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: []
|
||||
scheme: http
|
||||
timeout: 10s
|
||||
api_version: v1
|
||||
scrape_configs:
|
||||
- job_name: envoy
|
||||
honor_timestamps: true
|
||||
scrape_interval: 15s
|
||||
scrape_timeout: 10s
|
||||
metrics_path: /stats
|
||||
scheme: http
|
||||
static_configs:
|
||||
- targets:
|
||||
- envoy:9901
|
||||
params:
|
||||
format: ['prometheus']
|
||||
Loading…
Add table
Add a link
Reference in a new issue