Add support for local llm (mistral 7b) (#31)

This commit is contained in:
Adil Hafeez 2024-08-06 23:40:06 -07:00 committed by GitHub
parent b49fc2f264
commit 445b1ea210
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 703 additions and 51 deletions

4
.gitignore vendored
View file

@ -7,4 +7,6 @@ grafana-data
prom_data
.env
qdrant_data
demos/weather-forecast/generated/
generated
.DS_Store
*.gguf

16
chatbot-ui/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "chatbot-ui",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"program": "run.py",
"console": "integratedTerminal",
}
]
}

View file

@ -19,18 +19,21 @@ class Message(BaseModel):
role: str
content: str
async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=30) -> Optional[str]:
async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=120) -> Optional[str]:
"""
Sends a request to the ChatGPT API to retrieve a response based on a list of previous messages.
"""
header = {
"Content-Type": "application/json",
"Authorization": f"Bearer {OPENAI_API_KEY}"
}
if OPENAI_API_KEY is not None and OPENAI_API_KEY != "":
header["Authorization"] = f"Bearer {OPENAI_API_KEY}"
if OPENAI_API_KEY is None or OPENAI_API_KEY == "":
logger.error("No OpenAI API Key found. Please create .env file and set OPENAI_API_KEY env var !")
return None
if CHAT_COMPLETION_ENDPOINT.startswith("https://api.openai.com"):
logger.error("No OpenAI API Key found. Please create .env file and set OPENAI_API_KEY env var !")
return None
try:
async with async_timeout.timeout(delay=delay):
async with httpx.AsyncClient(headers=header) as aio_client:
@ -44,7 +47,8 @@ async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=30
json = {
"model": "gpt-3.5-turbo",
"messages": messages
}
},
timeout=delay
)
logger.debug(f"Status Code : {resp.status_code}")
if resp.status_code == 200:
@ -66,7 +70,8 @@ async def predict(input, history):
"""
history.append({"role": "user", "content": input})
response = await make_completion(history)
history.append({"role": "assistant", "content": response})
if response is not None:
history.append({"role": "assistant", "content": response})
messages = [(history[i]["content"], history[i+1]["content"]) for i in range(0, len(history)-1, 2)]
return messages, history

View file

@ -0,0 +1,22 @@
# Weather forecasting
This demo shows how you can use intelligent prompt gateway to provide realtime weather forecast using Mistral LLM locally hosted using llama.cpp as LLM Hosting Service.
# Startig the demo
1. Ensure that submodule is up to date
```sh
git submodule sync --recursive
```
1. Download mistral 7b model using following shell command
```sh
sh download_mistral_7b.sh
```
2. Start services
```sh
docker compose up
```
3. Navigate to http://localhost:18080/
4. You can type in queries like "how is the weather in Seattle"
1. You can also ask follow up questions like "show me sunny days"
5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login)
1. Open up dahsboard named "Intelligent Gateway Overview"
2. On this dashboard you can see reuqest latency and number of requests

View file

@ -0,0 +1,88 @@
services:
config-generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml
- ./generated:/usr/src/app/out
envoy:
build:
context: ../../
dockerfile: envoyfilter/Dockerfile
hostname: envoy
ports:
- "10000:10000"
- "19901:9901"
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
depends_on:
config-generator:
condition: service_completed_successfully
embeddingserver:
condition: service_healthy
embeddingserver:
build:
context: ../../embedding-server
dockerfile: Dockerfile
ports:
- "18081:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
qdrant:
image: qdrant/qdrant
hostname: vector-db
ports:
- 16333:6333
- 16334:6334
chatbot-ui:
build:
context: ../../chatbot-ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1/chat/completions
prometheus:
image: prom/prometheus
container_name: prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yaml'
ports:
- 9090:9090
restart: unless-stopped
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
grafana:
image: grafana/grafana
container_name: grafana
ports:
- 3000:3000
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
mistral_7b_instruct:
image: ghcr.io/ggerganov/llama.cpp:server
hostname: mistral_7b_instruct
ports:
- "10001:10001"
volumes:
- ./mistral-7b-instruct-v0.2.Q4_K_M.gguf:/models/model.gguf
command: ["--host", "0.0.0.0", "--port", "10001", "-m", "/models/model.gguf"]

View file

@ -0,0 +1 @@
huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -0,0 +1,355 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - internal (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - external (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Upstream request count",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Intelligent Gateway Overview",
"uid": "adt6uhx5lk8aob",
"version": 3,
"weekStart": ""
}

View file

@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -0,0 +1,46 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: open-ai-gpt-4
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
- name: mistral_7b_instruct
model: mistral-7b-instruct
endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
default: true
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Chicago?
entities:
- name: city
required: true
- name: days
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
#TODO: add support for adding custom clusters e.g.
# clusters:
# qdrant:
# options:
# - address: "qdrant"
# - address: "weatherhost"
# - port: 6333

View file

@ -0,0 +1,23 @@
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 15s
alerting:
alertmanagers:
- static_configs:
- targets: []
scheme: http
timeout: 10s
api_version: v1
scrape_configs:
- job_name: envoy
honor_timestamps: true
scrape_interval: 15s
scrape_timeout: 10s
metrics_path: /stats
scheme: http
static_configs:
- targets:
- envoy:9901
params:
format: ['prometheus']

View file

@ -4,12 +4,12 @@ This demo shows how you can use intelligent prompt gateway to provide realtime w
# Startig the demo
1. Ensure that submodule is up to date
```sh
$ git submodule sync --recursive
git submodule sync --recursive
```
1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY`
1. Start services
```sh
$ docker compose up
docker compose up
```
1. Navigate to http://localhost:18080/
1. You can type in queries like "how is the weather in Seattle"

View file

@ -17,8 +17,6 @@ services:
volumes:
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
networks:
- envoymesh
depends_on:
config-generator:
condition: service_completed_successfully
@ -35,17 +33,14 @@ services:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
networks:
- envoymesh
volumes:
- ~/.cache/huggingface:/root/.cache/huggingface
qdrant:
image: qdrant/qdrant
hostname: vector-db
ports:
- 16333:6333
- 16334:6334
networks:
- envoymesh
chatbot-ui:
build:
@ -53,8 +48,6 @@ services:
dockerfile: Dockerfile
ports:
- "18080:8080"
networks:
- envoymesh
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1/chat/completions
@ -70,8 +63,6 @@ services:
volumes:
- ./prometheus:/etc/prometheus
- ./prom_data:/prometheus
networks:
- envoymesh
grafana:
image: grafana/grafana
@ -86,9 +77,3 @@ services:
- ./grafana:/etc/grafana/provisioning/datasources
- ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml
- ./grafana/dashboards:/var/lib/grafana/dashboards
# - ./grafana-data:/var/lib/grafana
networks:
- envoymesh
networks:
envoymesh: {}

View file

@ -8,9 +8,10 @@ embedding_provider:
llm_providers:
- name: "open-ai-gpt-4"
api_key: "$OPEN_AI_API_KEY"
- name: open-ai-gpt-4
api_key: $OPEN_AI_API_KEY
model: gpt-4
default: true
prompt_targets:

16
embedding-server/.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "embedding server",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"module": "uvicorn",
"args": ["main:app","--reload", "--port", "8000"],
}
]
}

View file

@ -35,9 +35,11 @@ RUN apt-get update && apt-get install -y \
curl \
&& rm -rf /var/lib/apt/lists/*
RUN python install.py
# comment it out for now as we don't want to download the model every time we build the image
# we will mount host cache to docker image to avoid downloading the model every time
# see docker-compose file for more details
# RUN python install.py && \
# find /root/.cache/torch/sentence_transformers/ -name onnx -exec rm -rf {} +
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]

View file

@ -2,7 +2,7 @@ import os
import sentence_transformers
from gliner import GLiNER
def load_transformers(models = os.getenv("MODELS", "sentence-transformers/all-MiniLM-L6-v2")):
def load_transformers(models = os.getenv("MODELS", "BAAI/bge-large-en-v1.5")):
transformers = {}
for model in models.split(','):

View file

@ -9,12 +9,13 @@ services:
- ./envoy.yaml:/etc/envoy/envoy.yaml
- ./target/wasm32-wasi/release:/etc/envoy/proxy-wasm-plugins
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
networks:
- envoymesh
depends_on:
qdrant:
condition: service_started
embeddingserver:
condition: service_healthy
embeddingserver:
build:
context: ../embedding-server
@ -25,8 +26,6 @@ services:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
networks:
- envoymesh
qdrant:
image: qdrant/qdrant
@ -34,10 +33,12 @@ services:
ports:
- 16333:6333
- 16334:6334
volumes:
- ./qdrant_data:/qdrant/storage
networks:
- envoymesh
networks:
envoymesh: {}
chatbot-ui:
build:
context: ../chatbot-ui
dockerfile: Dockerfile
ports:
- "18080:8080"
environment:
- CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1/chat/completions

View file

@ -0,0 +1 @@
huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False

View file

@ -34,9 +34,19 @@ static_resources:
routes:
- match:
prefix: "/v1/chat/completions"
headers:
name: "Authorization"
present_match: true
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
- match:
prefix: "/v1/chat/completions"
route:
auto_host_rewrite: true
cluster: mistral_7b_instruct
timeout: 60s
- match:
prefix: "/embeddings"
route:
@ -156,3 +166,17 @@ static_resources:
address: qdrant
port_value: 6333
hostname: "qdrant"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: qdrant
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: mistral_7b_instruct
port_value: 10001
hostname: "mistral_7b_instruct"

View file

@ -28,15 +28,26 @@ static_resources:
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
- name: local_service
domains:
- "*"
routes:
- match:
prefix: "/v1/chat/completions"
headers:
name: "Authorization"
present_match: true
route:
auto_host_rewrite: true
cluster: openai
timeout: 60s
- match:
prefix: "/v1/chat/completions"
route:
auto_host_rewrite: true
cluster: mistral_7b_instruct
timeout: 60s
- match:
prefix: "/embeddings"
route:
@ -68,10 +79,16 @@ static_resources:
llm_providers:
- name: "open-ai-gpt-4"
- name: open-ai-gpt-4
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
- name: mistral_7b_instruct
model: mistral-7b-instruct
endpoint: http://mistral_7b_instruct:10001/v1/chat/completions
default: true
prompt_targets:
- type: context_resolver
@ -131,7 +148,6 @@ static_resources:
tls_params:
tls_minimum_protocol_version: TLSv1_2
tls_maximum_protocol_version: TLSv1_3
- name: embeddingserver
connect_timeout: 5s
type: STRICT_DNS
@ -143,8 +159,8 @@ static_resources:
- endpoint:
address:
socket_address:
address: embeddingserver
port_value: 80
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: weatherhost
connect_timeout: 5s
@ -157,8 +173,8 @@ static_resources:
- endpoint:
address:
socket_address:
address: embeddingserver
port_value: 80
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: nerhost
connect_timeout: 5s
@ -171,8 +187,8 @@ static_resources:
- endpoint:
address:
socket_address:
address: embeddingserver
port_value: 80
address: host.docker.internal
port_value: 8000
hostname: "embeddingserver"
- name: qdrant
connect_timeout: 5s
@ -188,3 +204,17 @@ static_resources:
address: qdrant
port_value: 6333
hostname: "qdrant"
- name: mistral_7b_instruct
connect_timeout: 5s
type: STRICT_DNS
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: qdrant
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: mistral_7b_instruct
port_value: 10001
hostname: "mistral_7b_instruct"

View file

@ -30,8 +30,16 @@ pub struct EmbeddingProviver {
//TODO: use enum for model, but if there is a new model, we need to update the code
pub struct LlmProvider {
pub name: String,
pub api_key: String,
pub api_key: Option<String>,
pub model: String,
pub default: Option<bool>,
pub endpoint: Option<EnpointType>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum EnpointType {
String(String),
Struct(Endpoint),
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View file

@ -50,6 +50,7 @@ impl StreamContext {
// However, a missing Content-Length header is not grounds for bad requests given that intermediary hops could
// manipulate the body in benign ways e.g., compression.
self.set_http_request_header("content-length", None);
// self.set_http_request_header("authorization", None);
}
fn modify_path_header(&mut self) {
@ -330,7 +331,7 @@ impl StreamContext {
return;
}
};
info!("sending request to openai: msg len: {}", json_string.len());
info!("sending request to openai: msg {}", json_string);
self.set_http_request_body(0, json_string.len(), &json_string.into_bytes());
self.resume_http_request();
}

View file

@ -23,6 +23,10 @@
{
"name": "demos/weather-forecast",
"path": "./demos/weather-forecast",
},
{
"name": "demos/weather-forecast-local-llm",
"path": "./demos/weather-forecast-local-llm",
}
],
"settings": {}