Improve demo experience (#28)

* Imrpove demo experience
- generate envoy.yaml using jinja2 template

* add todo

* add config file
This commit is contained in:
Adil Hafeez 2024-07-31 17:13:39 -07:00 committed by GitHub
parent 51d6213cdd
commit 6f6454069f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 124 additions and 36 deletions

View file

@ -0,0 +1,9 @@
FROM python:3-slim as config-generator
WORKDIR /usr/src/app
RUN pip install jinja2
COPY config_generator/config_generator.py .
COPY envoyfilter/envoy.template.yaml .
COPY envoyfilter/katanemo-config.yaml .
# RUN python config_generator.py > envoy.yaml
CMD ["python", "config_generator.py"]

View file

@ -0,0 +1,22 @@
import os
from jinja2 import Environment, FileSystemLoader
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv('ENVOY_CONFIG_TEMPLATE_FILE', 'envoy.template.yaml')
KATANEMO_CONFIG_FILE = os.getenv('KATANEMO_CONFIG_FILE', 'katanemo-config.yaml')
ENVOY_CONFIG_FILE_RENDERED = os.getenv('ENVOY_CONFIG_FILE_RENDERED', '/usr/src/app/out/envoy.yaml')
env = Environment(loader=FileSystemLoader('./'))
template = env.get_template('envoy.template.yaml')
with open(KATANEMO_CONFIG_FILE, 'r') as file:
katanemo_config = file.read()
data = {
'katanemo_config': katanemo_config
}
rendered = template.render(data)
print(rendered)
print(ENVOY_CONFIG_FILE_RENDERED)
with open(ENVOY_CONFIG_FILE_RENDERED, 'w') as file:
file.write(rendered)

View file

@ -1,4 +1,11 @@
services:
config-generator:
build:
context: ../../
dockerfile: config_generator/Dockerfile
volumes:
- ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml
- ./generated:/usr/src/app/out
envoy:
build:
context: ../../
@ -8,11 +15,13 @@ services:
- "10000:10000"
- "19901:9901"
volumes:
- ./envoy.yaml:/etc/envoy/envoy.yaml
- ./generated/envoy.yaml:/etc/envoy/envoy.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
networks:
- envoymesh
depends_on:
config-generator:
condition: service_completed_successfully
embeddingserver:
condition: service_healthy

View file

@ -0,0 +1,41 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: "open-ai-gpt-4"
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Chicago?
entities:
- name: city
required: true
- name: days
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
#TODO: add support for adding custom clusters e.g.
# clusters:
# qdrant:
# options:
# - address: "qdrant"
# - address: "weatherhost"
# - port: 6333

View file

@ -58,40 +58,7 @@ static_resources:
configuration:
"@type": "type.googleapis.com/google.protobuf.StringValue"
value: |
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: "open-ai-gpt-4"
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Seattle?
entities:
- name: city
required: true
- name: days
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
{{ katanemo_config | indent(30) }}
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:

View file

@ -91,7 +91,6 @@ static_resources:
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
vm_config:
runtime: "envoy.wasm.runtime.v8"
code:

View file

@ -0,0 +1,41 @@
default_prompt_endpoint: "127.0.0.1"
load_balancing: "round_robin"
timeout_ms: 5000
embedding_provider:
name: "SentenceTransformer"
model: "all-MiniLM-L6-v2"
llm_providers:
- name: "open-ai-gpt-4"
api_key: "$OPEN_AI_API_KEY"
model: gpt-4
prompt_targets:
- type: context_resolver
name: weather_forecast
few_shot_examples:
- what is the weather in New York?
- how is the weather in San Francisco?
- what is the forecast in Chicago?
entities:
- name: city
required: true
- name: days
endpoint:
cluster: weatherhost
path: /weather
system_prompt: |
You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries:
- Use farenheight for temperature
- Use miles per hour for wind speed
#TODO: add support for adding custom clusters e.g.
# clusters:
# qdrant:
# options:
# - address: "qdrant"
# - address: "weatherhost"
# - port: 6333