diff --git a/.gitignore b/.gitignore index 0307b5e7..8be85e66 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,6 @@ qdrant_data generated .DS_Store *.gguf +venv +demos/function-calling/ollama/models/ +demos/function-calling/ollama/id_ed* diff --git a/.gitmodules b/.gitmodules index 231718ec..74191014 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "open-message-format"] path = open-message-format - url = git@github.com:open-llm-initiative/open-message-format.git + url = git@github.com:open-llm-initiative/open-message-format-private.git diff --git a/chatbot-ui/app/run.py b/chatbot-ui/app/run.py index e487e67e..b9eb8fc7 100644 --- a/chatbot-ui/app/run.py +++ b/chatbot-ui/app/run.py @@ -15,9 +15,14 @@ load_dotenv() OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT", "https://api.openai.com/v1/chat/completions") + class Message(BaseModel): + role: str content: str + # model is additional state we maintin on client side so that bolt gateway can know which model responded to user prompt + model: str + resolver: str async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=120) -> Optional[str]: """ @@ -52,7 +57,20 @@ async def make_completion(messages:List[Message], nb_retries:int=3, delay:int=12 ) logger.debug(f"Status Code : {resp.status_code}") if resp.status_code == 200: - return resp.json()["choices"][0]["message"]["content"] + resp_json = resp.json() + model = resp_json["model"] + msg = {} + msg["role"] = "assistant" + msg["model"] = model + if "resolver_name" in resp_json: + msg["resolver"] = resp_json["resolver_name"] + if "choices" in resp_json: + msg["content"] = resp_json["choices"][0]["message"]["content"] + return msg + elif "message" in resp_json: + msg["content"] = resp_json["message"]["content"] + return msg + keep_loop = False else: logger.warning(resp.content) keep_loop = False @@ -70,20 +88,22 @@ async def predict(input, history): """ history.append({"role": "user", "content": input}) response = await make_completion(history) + print(response) if response is not None: - history.append({"role": "assistant", "content": response}) + history.append(response) messages = [(history[i]["content"], history[i+1]["content"]) for i in range(0, len(history)-1, 2)] return messages, history """ Gradio Blocks low-level API that allows to create custom web applications (here our chat app) """ -with gr.Blocks() as demo: +# with fill_height=true the chatbot to fill the height of the page +with gr.Blocks(fill_height=True, css="footer {visibility: hidden}") as demo: logger.info("Starting Demo...") - chatbot = gr.Chatbot(label="WebGPT") + chatbot = gr.Chatbot(label="Bolt Chatbot", scale=1) state = gr.State([]) with gr.Row(): - txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter") + txt = gr.Textbox(show_label=False, placeholder="Enter text and press enter", scale=1) txt.submit(predict, [txt, state], [chatbot, state]) demo.launch(server_name="0.0.0.0", server_port=8080) diff --git a/chatbot-ui/requirements.txt b/chatbot-ui/requirements.txt index 732b77c8..6e1cce4f 100644 --- a/chatbot-ui/requirements.txt +++ b/chatbot-ui/requirements.txt @@ -1,6 +1,7 @@ -gradio==4.39.0 +gradio==4.43.0 async_timeout==4.0.3 loguru==0.7.2 asyncio==3.4.3 httpx==0.27.0 python-dotenv==1.0.1 +pydantic==2.8.2 diff --git a/demos/function-calling/Bolt-FC-1B-Q3_K_L.model_file b/demos/function-calling/Bolt-FC-1B-Q3_K_L.model_file new file mode 100644 index 00000000..d58a6a17 --- /dev/null +++ b/demos/function-calling/Bolt-FC-1B-Q3_K_L.model_file @@ -0,0 +1,25 @@ +FROM Bolt-Function-Calling-1B-Q3_K_L.gguf + +# Set the size of the context window used to generate the next token +# PARAMETER num_ctx 16384 +PARAMETER num_ctx 4096 + +# Set parameters for response generation +PARAMETER num_predict 1024 +PARAMETER temperature 0.1 +PARAMETER top_p 0.5 +PARAMETER top_k 32022 +PARAMETER repeat_penalty 1.0 +PARAMETER stop "<|EOT|>" + +# Set the random number seed to use for generation +PARAMETER seed 42 + +# Set the prompt template to be passed into the model +TEMPLATE """{{ if .System }}<|begin▁of▁sentence|> +{{ .System }} +{{ end }}{{ if .Prompt }}### Instruction: +{{ .Prompt }} +{{ end }}### Response: +{{ .Response }} +<|EOT|>""" diff --git a/demos/function-calling/Bolt-FC-1B-Q4_K_M.model_file b/demos/function-calling/Bolt-FC-1B-Q4_K_M.model_file new file mode 100644 index 00000000..1def85b1 --- /dev/null +++ b/demos/function-calling/Bolt-FC-1B-Q4_K_M.model_file @@ -0,0 +1,24 @@ +FROM Bolt-Function-Calling-1B-Q4_K_M.gguf + +# Set the size of the context window used to generate the next token +PARAMETER num_ctx 4096 + +# Set parameters for response generation +PARAMETER num_predict 1024 +PARAMETER temperature 0.1 +PARAMETER top_p 0.5 +PARAMETER top_k 32022 +PARAMETER repeat_penalty 1.0 +PARAMETER stop "<|EOT|>" + +# Set the random number seed to use for generation +PARAMETER seed 42 + +# Set the prompt template to be passed into the model +TEMPLATE """{{ if .System }}<|begin▁of▁sentence|> +{{ .System }} +{{ end }}{{ if .Prompt }}### Instruction: +{{ .Prompt }} +{{ end }}### Response: +{{ .Response }} +<|EOT|>""" diff --git a/demos/function-calling/README.md b/demos/function-calling/README.md new file mode 100644 index 00000000..351b6a26 --- /dev/null +++ b/demos/function-calling/README.md @@ -0,0 +1,24 @@ +# Function calling +This demo shows how you can use intelligent prompt gateway to do function calling. This demo assumes you are using ollama running natively. If you want to run ollama running inside docker then please update ollama endpoint in docker-compose file. + +# Startig the demo +1. Ensure that submodule is up to date + ```sh + git submodule sync --recursive + ``` +1. Create `.env` file and set OpenAI key using env var `OPENAI_API_KEY` +1. Start services + ```sh + docker compose up + ``` +1. Download Bolt-FC model. This demo assumes we have downloaded `Bolt-Function-Calling-1B:Q4_K_M` to local folder +2. Create model file in ollama repository + ```sh + ollama create Bolt-Function-Calling-1B:Q4_K_M -f Bolt-FC-1B-Q4_K_M.model_file + ``` +3. Navigate to http://localhost:18080/ +4. You can type in queries like "how is the weather in Seattle" + 1. You can also ask follow up questions like "show me sunny days" +5. To see metrics navigate to "http://localhost:3000/" (use admin/grafana for login) + 1. Open up dahsboard named "Intelligent Gateway Overview" + 2. On this dashboard you can see reuqest latency and number of requests diff --git a/demos/function-calling/docker-compose.yaml b/demos/function-calling/docker-compose.yaml new file mode 100644 index 00000000..541a5859 --- /dev/null +++ b/demos/function-calling/docker-compose.yaml @@ -0,0 +1,115 @@ + +services: + + config-generator: + build: + context: ../../ + dockerfile: config_generator/Dockerfile + volumes: + - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml + - ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml + - ./generated:/usr/src/app/out + + envoy: + build: + context: ../../ + dockerfile: envoyfilter/Dockerfile + hostname: envoy + ports: + - "10000:10000" + - "19901:9901" + volumes: + - ./generated/envoy.yaml:/etc/envoy/envoy.yaml + - /etc/ssl/cert.pem:/etc/ssl/cert.pem + depends_on: + config-generator: + condition: service_completed_successfully + embeddingserver: + condition: service_healthy + environment: + - LOG_LEVEL=debug + + embeddingserver: + build: + context: ../../embedding-server + dockerfile: Dockerfile + ports: + - "18081:80" + healthcheck: + test: ["CMD", "curl" ,"http://localhost:80/healthz"] + interval: 5s + retries: 20 + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + + functionresolver: + build: + context: ../../function_resolver + dockerfile: Dockerfile + ports: + - "18082:80" + healthcheck: + test: ["CMD", "curl" ,"http://localhost:80/healthz"] + interval: 5s + retries: 20 + volumes: + - ~/.cache/huggingface:/root/.cache/huggingface + environment: + # use ollama endpoint that is hosted by host machine (no virtualization) + - OLLAMA_ENDPOINT=host.docker.internal + # uncomment following line to use ollama endpoint that is hosted by docker + # - OLLAMA_ENDPOINT=ollama + + ollama: + image: ollama/ollama + container_name: ollama + volumes: + - ./ollama:/root/.ollama + restart: unless-stopped + ports: + - '11434:11434' + profiles: + - manual + + qdrant: + image: qdrant/qdrant + hostname: vector-db + ports: + - 16333:6333 + - 16334:6334 + + chatbot-ui: + build: + context: ../../chatbot-ui + dockerfile: Dockerfile + ports: + - "18080:8080" + environment: + - OPENAI_API_KEY=${OPENAI_API_KEY} + - CHAT_COMPLETION_ENDPOINT=http://envoy:10000/v1/chat/completions + + prometheus: + image: prom/prometheus + container_name: prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yaml' + ports: + - 9090:9090 + restart: unless-stopped + volumes: + - ./prometheus:/etc/prometheus + - ./prom_data:/prometheus + + grafana: + image: grafana/grafana + container_name: grafana + ports: + - 3000:3000 + restart: unless-stopped + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD=grafana + volumes: + - ./grafana:/etc/grafana/provisioning/datasources + - ./grafana/dashboard.yaml:/etc/grafana/provisioning/dashboards/main.yaml + - ./grafana/dashboards:/var/lib/grafana/dashboards diff --git a/demos/function-calling/grafana/dashboard.yaml b/demos/function-calling/grafana/dashboard.yaml new file mode 100644 index 00000000..fd66a479 --- /dev/null +++ b/demos/function-calling/grafana/dashboard.yaml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: "Dashboard provider" + orgId: 1 + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: false + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/demos/function-calling/grafana/dashboards/envoy_overview.json b/demos/function-calling/grafana/dashboards/envoy_overview.json new file mode 100644 index 00000000..51bff777 --- /dev/null +++ b/demos/function-calling/grafana/dashboards/envoy_overview.json @@ -0,0 +1,355 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "request latency - internal (ms)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "request latency - external (ms)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Upstream request count", + "type": "timeseries" + } + ], + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Intelligent Gateway Overview", + "uid": "adt6uhx5lk8aob", + "version": 3, + "weekStart": "" +} diff --git a/demos/function-calling/grafana/datasource.yaml b/demos/function-calling/grafana/datasource.yaml new file mode 100644 index 00000000..4870174e --- /dev/null +++ b/demos/function-calling/grafana/datasource.yaml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: +- name: Prometheus + type: prometheus + url: http://prometheus:9090 + isDefault: true + access: proxy + editable: true diff --git a/demos/function-calling/katanemo-config.yaml b/demos/function-calling/katanemo-config.yaml new file mode 100644 index 00000000..03bf869b --- /dev/null +++ b/demos/function-calling/katanemo-config.yaml @@ -0,0 +1,41 @@ +default_prompt_endpoint: "127.0.0.1" +load_balancing: "round_robin" +timeout_ms: 5000 + + +# should not be here +embedding_provider: + name: "bge-large-en-v1.5" + model: "BAAI/bge-large-en-v1.5" + +llm_providers: + + - name: open-ai-gpt-4 + api_key: $OPEN_AI_API_KEY + model: gpt-4 + default: true + +prompt_targets: + + - type: function_resolver + name: weather_forecast + description: This function resolver provides weather forecast information for a given city. + few_shot_examples: + - what is the weather in New York? + - how is the weather in San Francisco? + - what is the forecast in Chicago? + parameters: + - name: city + required: true + description: The city for which the weather forecast is requested. + - name: days + description: The number of days for which the weather forecast is requested. + - name: units + description: The units in which the weather forecast is requested. + endpoint: + cluster: weatherhost + path: /weather + system_prompt: | + You are a helpful weather forecaster. Use weater data that is provided to you. Please following following guidelines when responding to user queries: + - Use farenheight for temperature + - Use miles per hour for wind speed diff --git a/demos/function-calling/prometheus/prometheus.yaml b/demos/function-calling/prometheus/prometheus.yaml new file mode 100644 index 00000000..5aa25e0d --- /dev/null +++ b/demos/function-calling/prometheus/prometheus.yaml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + scrape_timeout: 10s + evaluation_interval: 15s +alerting: + alertmanagers: + - static_configs: + - targets: [] + scheme: http + timeout: 10s + api_version: v1 +scrape_configs: +- job_name: envoy + honor_timestamps: true + scrape_interval: 15s + scrape_timeout: 10s + metrics_path: /stats + scheme: http + static_configs: + - targets: + - envoy:9901 + params: + format: ['prometheus'] diff --git a/demos/weather-forecast-local-llm/docker-compose.yaml b/demos/weather-forecast-local-llm/docker-compose.yaml index 5b6611a8..369511df 100644 --- a/demos/weather-forecast-local-llm/docker-compose.yaml +++ b/demos/weather-forecast-local-llm/docker-compose.yaml @@ -4,6 +4,7 @@ services: context: ../../ dockerfile: config_generator/Dockerfile volumes: + - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml - ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml - ./generated:/usr/src/app/out envoy: diff --git a/demos/weather-forecast/docker-compose.yaml b/demos/weather-forecast/docker-compose.yaml index 0d862933..28480588 100644 --- a/demos/weather-forecast/docker-compose.yaml +++ b/demos/weather-forecast/docker-compose.yaml @@ -4,6 +4,7 @@ services: context: ../../ dockerfile: config_generator/Dockerfile volumes: + - ../../envoyfilter/envoy.template.yaml:/usr/src/app/envoy.template.yaml - ./katanemo-config.yaml:/usr/src/app/katanemo-config.yaml - ./generated:/usr/src/app/out envoy: diff --git a/envoyfilter/Cargo.lock b/envoyfilter/Cargo.lock index 80cfbddc..dbf8ae79 100644 --- a/envoyfilter/Cargo.lock +++ b/envoyfilter/Cargo.lock @@ -70,9 +70,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "10f00e1f6e58a40e807377c75c6a7f97bf9044fab57816f2414e6f5f4499d7b8" [[package]] name = "arbitrary" @@ -82,13 +82,13 @@ checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" [[package]] name = "async-trait" -version = "0.1.81" +version = "0.1.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" +checksum = "a27b8a3a6e1a44fa4c8baf1f653e4172e81486d4941f2237e20dc2d0cf4ddff1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -202,18 +202,19 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.6.1" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12916984aab3fa6e39d655a33e09c0071eb36d6ab3aea5c2d78551f1df6d952" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cc" -version = "1.1.6" +version = "1.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f" +checksum = "a93fe60e2fc87b6ba2c117f67ae14f66e3fc7d6a1e612a25adb238cc980eadb3" dependencies = [ "jobserver", "libc", + "shlex", ] [[package]] @@ -261,24 +262,24 @@ dependencies = [ [[package]] name = "core-foundation-sys" -version = "0.8.6" +version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpp_demangle" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119" +checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d" dependencies = [ "cfg-if 1.0.0", ] [[package]] name = "cpufeatures" -version = "0.2.12" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -550,9 +551,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "fnv" @@ -737,9 +738,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" dependencies = [ "atomic-waker", "bytes", @@ -876,9 +877,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http", @@ -909,9 +910,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" dependencies = [ "bytes", "futures-channel", @@ -945,9 +946,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -1025,9 +1026,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" +checksum = "1868808506b929d7b0cfa8f75951347aa71bb21144b7791bae35d9bccfcfe37a" dependencies = [ "wasm-bindgen", ] @@ -1046,9 +1047,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libm" @@ -1145,9 +1146,9 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4569e456d394deccd22ce1c1913e6ea0e54519f577285001215d33557431afe4" +checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" dependencies = [ "hermit-abi 0.3.9", "libc", @@ -1195,9 +1196,9 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "object" -version = "0.36.2" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "crc32fast", "hashbrown 0.14.5", @@ -1245,7 +1246,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -1286,7 +1287,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-targets 0.52.6", + "windows-targets", ] [[package]] @@ -1318,7 +1319,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -1359,9 +1360,12 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro-error" @@ -1422,9 +1426,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" dependencies = [ "cc", ] @@ -1439,9 +1443,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -1507,9 +1511,9 @@ dependencies = [ [[package]] name = "redox_users" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd283d9651eeda4b2a83a43c1c91b266c40fd76ecd39a50a8c630ae69dc72891" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom", "libredox", @@ -1560,9 +1564,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" [[package]] name = "reqwest" -version = "0.12.5" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7d6d2a27d57148378eb5e111173f4276ad26340ecc5c49a4a2152167a2d6a37" +checksum = "f8f4955649ef5c38cc7f9e8aa41761d48fb9677197daea9984dc54f56aad5e63" dependencies = [ "base64 0.22.1", "bytes", @@ -1599,7 +1603,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "windows-registry", ] [[package]] @@ -1631,9 +1635,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "3f55e80d50763938498dd5ebb18647174e0c76dc38c5505294bb224624f30f36" dependencies = [ "bitflags 2.6.0", "errno", @@ -1657,9 +1661,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425" dependencies = [ "base64 0.22.1", "rustls-pki-types", @@ -1667,15 +1671,15 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" -version = "0.102.6" +version = "0.102.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e6b52d4fda176fd835fdc55a835d4a89b8499cad995885a21149d5ad62f852e" +checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" dependencies = [ "ring", "rustls-pki-types", @@ -1690,9 +1694,9 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" [[package]] name = "scc" -version = "2.1.5" +version = "2.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fadf67e3cf23f8b11a6c8c48a16cb2437381503615acd91094ec7b4686a5a53" +checksum = "aeb7ac86243095b70a7920639507b71d51a63390d1ba26c4f60a552fbb914a37" dependencies = [ "sdd", ] @@ -1714,9 +1718,9 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sdd" -version = "1.7.0" +version = "3.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85f05a494052771fc5bd0619742363b5e24e5ad72ab3111ec2e27925b8edc5f3" +checksum = "0495e4577c672de8254beb68d01a9b62d0e8a13c099edecdbedccce3223cd29f" [[package]] name = "security-framework" @@ -1752,31 +1756,32 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1837,7 +1842,7 @@ checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -1851,6 +1856,12 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "slab" version = "0.4.9" @@ -1961,9 +1972,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.72" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -1975,23 +1986,26 @@ name = "sync_wrapper" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" +dependencies = [ + "futures-core", +] [[package]] name = "system-configuration" -version = "0.5.1" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.6.0", "core-foundation", "system-configuration-sys", ] [[package]] name = "system-configuration-sys" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" dependencies = [ "core-foundation-sys", "libc", @@ -1999,20 +2013,21 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.15" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.10.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if 1.0.0", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -2050,7 +2065,7 @@ checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2085,9 +2100,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.39.1" +version = "1.40.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d040ac2b29ab03b09d4129c2f5bbd012a3ac2f79d38ff506a4bf8dd34b0eac8a" +checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" dependencies = [ "backtrace", "bytes", @@ -2121,9 +2136,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.11" +version = "0.7.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" dependencies = [ "bytes", "futures-core", @@ -2134,9 +2149,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.16" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81967dd0dd2c1ab0bc3468bd7caecc32b8a4aa47d0c8c695d8c2b2108168d62c" +checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" dependencies = [ "serde", "serde_spanned", @@ -2146,18 +2161,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.7" +version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.17" +version = "0.22.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9f8729f5aea9562aac1cc0441f5d6de3cff1ee0c5d67293eeca5eb36ee7c16" +checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" dependencies = [ "indexmap", "serde", @@ -2183,15 +2198,15 @@ dependencies = [ [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -2268,9 +2283,9 @@ checksum = "0336d538f7abc86d282a4189614dfaa90810dfc2c6f6427eaf88e16311dd225d" [[package]] name = "unicode-xid" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" +checksum = "229730647fbc343e3a80e463c1db7f78f3855d3f3739bee0dda773c9a037c90a" [[package]] name = "unsafe-libyaml" @@ -2340,34 +2355,35 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" +checksum = "a82edfc16a6c469f5f44dc7b571814045d60404b55a0ee849f9bcfa2e63dd9b5" dependencies = [ "cfg-if 1.0.0", + "once_cell", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" +checksum = "9de396da306523044d3302746f1208fa71d7532227f15e347e2d93e4145dd77b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76bc14366121efc8dbb487ab05bcc9d346b3b5ec0eaa76e46594cabbe51762c0" +checksum = "61e9300f63a621e96ed275155c108eb6f843b6a26d053f122ab69724559dc8ed" dependencies = [ "cfg-if 1.0.0", "js-sys", @@ -2377,9 +2393,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" +checksum = "585c4c91a46b072c92e908d99cb1dcdf95c5218eeb6f3bf1efa991ee7a68cccf" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -2387,22 +2403,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" +checksum = "afc340c74d9005395cf9dd098506f7f44e38f2b4a21c6aaacf9a105ea5e1e836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.92" +version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" [[package]] name = "wasm-encoder" @@ -2415,9 +2431,9 @@ dependencies = [ [[package]] name = "wasm-encoder" -version = "0.214.0" +version = "0.216.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff694f02a8d7a50b6922b197ae03883fbf18cdb2ae9fbee7b6148456f5f44041" +checksum = "04c23aebea22c8a75833ae08ed31ccc020835b12a41999e58c31464271b94a88" dependencies = [ "leb128", ] @@ -2541,7 +2557,7 @@ dependencies = [ "anyhow", "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", "wasmtime-component-util", "wasmtime-wit-bindgen", "wit-parser", @@ -2671,7 +2687,7 @@ checksum = "99c02af2e9dbeb427304d1a08787d70ed0dbfec1af2236616f84c9f1f03e7969" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2705,31 +2721,31 @@ dependencies = [ [[package]] name = "wast" -version = "214.0.0" +version = "216.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "694bcdb24c49c8709bd8713768b71301a11e823923eee355d530f1d8d0a7f8e9" +checksum = "f7eb1f2eecd913fdde0dc6c3439d0f24530a98ac6db6cb3d14d92a5328554a08" dependencies = [ "bumpalo", "leb128", "memchr", "unicode-width", - "wasm-encoder 0.214.0", + "wasm-encoder 0.216.0", ] [[package]] name = "wat" -version = "1.214.0" +version = "1.216.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347249eb56773fa728df2656cfe3a8c19437ded61a922a0b5e0839d9790e278e" +checksum = "ac0409090fb5154f95fb5ba3235675fd9e579e731524d63b6a2f653e1280c82a" dependencies = [ "wast", ] [[package]] name = "web-sys" -version = "0.3.69" +version = "0.3.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" +checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" dependencies = [ "js-sys", "wasm-bindgen", @@ -2757,7 +2773,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] @@ -2784,12 +2800,33 @@ dependencies = [ ] [[package]] -name = "windows-sys" -version = "0.48.0" +name = "windows-registry" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +checksum = "e400001bb720a623c1c69032f8e3e4cf09984deec740f007dd2b03ec864804b0" dependencies = [ - "windows-targets 0.48.5", + "windows-result", + "windows-strings", + "windows-targets", +] + +[[package]] +name = "windows-result" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-strings" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +dependencies = [ + "windows-result", + "windows-targets", ] [[package]] @@ -2798,22 +2835,16 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.6", + "windows-targets", ] [[package]] -name = "windows-targets" -version = "0.48.5" +name = "windows-sys" +version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ - "windows_aarch64_gnullvm 0.48.5", - "windows_aarch64_msvc 0.48.5", - "windows_i686_gnu 0.48.5", - "windows_i686_msvc 0.48.5", - "windows_x86_64_gnu 0.48.5", - "windows_x86_64_gnullvm 0.48.5", - "windows_x86_64_msvc 0.48.5", + "windows-targets", ] [[package]] @@ -2822,46 +2853,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm 0.52.6", - "windows_aarch64_msvc 0.52.6", - "windows_i686_gnu 0.52.6", + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", "windows_i686_gnullvm", - "windows_i686_msvc 0.52.6", - "windows_x86_64_gnu 0.52.6", - "windows_x86_64_gnullvm 0.52.6", - "windows_x86_64_msvc 0.52.6", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -2874,48 +2887,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" - [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -2924,23 +2913,13 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.16" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b480ae9340fc261e6be3e95a1ba86d54ae3f9171132a73ce8d4bbaf68339507c" +checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f" dependencies = [ "memchr", ] -[[package]] -name = "winreg" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" -dependencies = [ - "cfg-if 1.0.0", - "windows-sys 0.48.0", -] - [[package]] name = "wit-parser" version = "0.212.0" @@ -2965,6 +2944,7 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] @@ -2976,7 +2956,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.72", + "syn 2.0.77", ] [[package]] @@ -2996,18 +2976,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/envoyfilter/Dockerfile b/envoyfilter/Dockerfile index 2fa3a064..9d48f20b 100644 --- a/envoyfilter/Dockerfile +++ b/envoyfilter/Dockerfile @@ -5,6 +5,7 @@ COPY envoyfilter/src /envoyfilter/src COPY envoyfilter/Cargo.toml /envoyfilter/ COPY envoyfilter/Cargo.lock /envoyfilter/ COPY open-message-format /open-message-format +COPY public-types /public-types RUN rustup -v target add wasm32-wasi RUN cargo build --release --target wasm32-wasi @@ -14,3 +15,4 @@ FROM envoyproxy/envoy:v1.30-latest COPY --from=builder /envoyfilter/target/wasm32-wasi/release/intelligent_prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm COPY envoyfilter/envoy.yaml /etc/envoy.yaml CMD ["envoy", "-c", "/etc/envoy/envoy.yaml"] +# CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--log-level", "debug"] diff --git a/envoyfilter/docker-compose.yaml b/envoyfilter/docker-compose.yaml index c53fb440..3953a1b1 100644 --- a/envoyfilter/docker-compose.yaml +++ b/envoyfilter/docker-compose.yaml @@ -15,7 +15,6 @@ services: embeddingserver: condition: service_healthy - embeddingserver: build: context: ../embedding-server diff --git a/envoyfilter/envoy.template.yaml b/envoyfilter/envoy.template.yaml index c71cefe2..50610eb0 100644 --- a/envoyfilter/envoy.template.yaml +++ b/envoyfilter/envoy.template.yaml @@ -32,6 +32,19 @@ static_resources: domains: - "*" routes: + - match: + prefix: "/mistral/v1/chat/completions" + route: + auto_host_rewrite: true + cluster: mistral_7b_instruct + timeout: 60s + - match: + prefix: "/bolt_fc_1b/v1/chat/completions" + route: + prefix_rewrite: /v1/chat/completions + auto_host_rewrite: true + cluster: bolt_fc_1b + timeout: 120s - match: prefix: "/v1/chat/completions" headers: @@ -180,3 +193,17 @@ static_resources: address: mistral_7b_instruct port_value: 10001 hostname: "mistral_7b_instruct" + - name: bolt_fc_1b + connect_timeout: 5s + type: STRICT_DNS + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: bolt_fc_1b + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: functionresolver + port_value: 80 + hostname: "bolt_fc_1b" diff --git a/envoyfilter/src/consts.rs b/envoyfilter/src/consts.rs index b2fa445e..732a2bc5 100644 --- a/envoyfilter/src/consts.rs +++ b/envoyfilter/src/consts.rs @@ -1,8 +1,9 @@ pub const DEFAULT_EMBEDDING_MODEL: &str = "BAAI/bge-large-en-v1.5"; pub const DEFAULT_COLLECTION_NAME: &str = "prompt_vector_store"; -pub const DEFAULT_NER_MODEL: &str = "urchade/gliner_large-v2.1"; pub const DEFAULT_PROMPT_TARGET_THRESHOLD: f64 = 0.6; -pub const DEFAULT_NER_THRESHOLD: f64 = 0.6; pub const RATELIMIT_SELECTOR_HEADER_KEY: &str = "x-katanemo-ratelimit-selector"; pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; +pub const GPT_35_TURBO: &str = "gpt-3.5-turbo"; +pub const BOLT_FC_CLUSTER: &str = "bolt_fc_1b"; +pub const BOLT_FC_REQUEST_TIMEOUT_MS: u64 = 120000; // 2 minutes diff --git a/envoyfilter/src/filter_context.rs b/envoyfilter/src/filter_context.rs index 7ceafff3..cbe24ecd 100644 --- a/envoyfilter/src/filter_context.rs +++ b/envoyfilter/src/filter_context.rs @@ -2,7 +2,7 @@ use crate::consts::DEFAULT_EMBEDDING_MODEL; use crate::ratelimit; use crate::stats::{Counter, Gauge, RecordingMetric}; use crate::stream_context::StreamContext; -use log::{debug, info}; +use log::debug; use md5::Digest; use open_message_format_embeddings::models::{ CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, @@ -72,6 +72,8 @@ impl FilterContext { (":path", "/embeddings"), (":authority", "embeddingserver"), ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "20000"), ], Some(json_data.as_bytes()), vec![], @@ -87,6 +89,10 @@ impl FilterContext { // Need to clone prompt target to leave config string intact. prompt_target: prompt_target.clone(), }; + debug!( + "dispatched HTTP call to embedding server token_id={}", + token_id + ); if self .callouts .insert(token_id, { @@ -112,7 +118,16 @@ impl FilterContext { if let Some(body) = self.get_http_call_response_body(0, body_size) { if !body.is_empty() { let mut embedding_response: CreateEmbeddingResponse = - serde_json::from_slice(&body).unwrap(); + match serde_json::from_slice(&body) { + Ok(response) => response, + Err(e) => { + panic!( + "Error deserializing embedding response. body: {:?}: {:?}", + String::from_utf8(body).unwrap(), + e + ); + } + }; let mut payload: HashMap = HashMap::new(); payload.insert( @@ -168,13 +183,15 @@ impl FilterContext { .active_http_calls .record(self.callouts.len().try_into().unwrap()); } + } else { + panic!("No body in response"); } } fn create_vector_store_points_handler(&self, body_size: usize) { if let Some(body) = self.get_http_call_response_body(0, body_size) { if !body.is_empty() { - info!( + debug!( "response body: len {:?}", String::from_utf8(body).unwrap().len() ); @@ -225,7 +242,10 @@ impl Context for FilterContext { body_size: usize, _num_trailers: usize, ) { - let callout_data = self.callouts.remove(&token_id).expect("invalid token_id"); + let callout_data = self + .callouts + .remove(&token_id) + .expect("invalid token_id: {}"); self.metrics .active_http_calls @@ -250,7 +270,7 @@ impl Context for FilterContext { http_status_code.clone_from(v); } }); - info!("CreateVectorCollection response: {}", http_status_code); + debug!("CreateVectorCollection response: {}", http_status_code); } } } diff --git a/envoyfilter/src/stream_context.rs b/envoyfilter/src/stream_context.rs index 0a751ec5..c96f1a4f 100644 --- a/envoyfilter/src/stream_context.rs +++ b/envoyfilter/src/stream_context.rs @@ -1,6 +1,7 @@ use crate::consts::{ - DEFAULT_COLLECTION_NAME, DEFAULT_EMBEDDING_MODEL, DEFAULT_NER_MODEL, DEFAULT_NER_THRESHOLD, - DEFAULT_PROMPT_TARGET_THRESHOLD, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, USER_ROLE, + BOLT_FC_CLUSTER, BOLT_FC_REQUEST_TIMEOUT_MS, DEFAULT_COLLECTION_NAME, DEFAULT_EMBEDDING_MODEL, + DEFAULT_PROMPT_TARGET_THRESHOLD, GPT_35_TURBO, RATELIMIT_SELECTOR_HEADER_KEY, SYSTEM_ROLE, + USER_ROLE, }; use crate::filter_context::WasmMetrics; use crate::ratelimit; @@ -16,9 +17,12 @@ use proxy_wasm::traits::*; use proxy_wasm::types::*; use public_types::common_types::{ open_ai::{ChatCompletions, Message}, - NERRequest, NERResponse, SearchPointsRequest, SearchPointsResponse, + SearchPointsRequest, SearchPointsResponse, }; -use public_types::configuration::{Entity, PromptTarget}; +use public_types::common_types::{ + BoltFCResponse, BoltFCToolsCall, ToolParameter, ToolParameters, ToolsDefinition, +}; +use public_types::configuration::{PromptTarget, PromptType}; use std::collections::HashMap; use std::num::NonZero; use std::rc::Rc; @@ -27,8 +31,8 @@ use std::time::Duration; enum RequestType { GetEmbedding, SearchPoints, - Ner, - ContextResolver, + FunctionResolver, + FunctionCallResponse, } pub struct CallContext { @@ -153,8 +157,23 @@ impl StreamContext { } info!("similarity score: {}", search_results[0].score); + // Check to see who responded to user message. This will help us identify if control should be passed to Bolt FC or not. + // If the last message was from Bolt FC, then Bolt FC is handling the conversation (possibly for parameter collection). + let mut bolt_assistant = false; + let messages = &callout_context.request_body.messages; + if messages.len() >= 2 { + let latest_assistant_message = &messages[messages.len() - 2]; + if let Some(model) = latest_assistant_message.model.as_ref() { + if model.starts_with("Bolt") { + info!("Bolt assistant message found"); + bolt_assistant = true; + } + } + } else { + info!("no assistant message found, probably first interaction"); + } - if search_results[0].score < DEFAULT_PROMPT_TARGET_THRESHOLD { + if search_results[0].score < DEFAULT_PROMPT_TARGET_THRESHOLD && !bolt_assistant { info!( "prompt target below threshold: {}", DEFAULT_PROMPT_TARGET_THRESHOLD @@ -172,177 +191,237 @@ impl StreamContext { return; } }; - info!("prompt_target name: {:?}", prompt_target.name); + info!( + "prompt_target name: {:?}, type: {:?}", + prompt_target.name, prompt_target.prompt_type + ); - // only extract entity names - let entity_names: Vec = match prompt_target.entities { - // Clone is unavoidable here because we don't want to move the values out of the prompt target struct. - Some(ref entities) => entities.iter().map(|entity| entity.name.clone()).collect(), - None => vec![], - }; + match prompt_target.prompt_type { + PromptType::FunctionResolver => { + // only extract entity names + let properties: HashMap = match prompt_target.parameters { + // Clone is unavoidable here because we don't want to move the values out of the prompt target struct. + Some(ref entities) => { + let mut properties: HashMap = HashMap::new(); + for entity in entities.iter() { + let param = ToolParameter { + parameter_type: entity.parameter_type.clone(), + description: entity.description.clone(), + required: entity.required, + }; + properties.insert(entity.name.clone(), param); + } + properties + } + None => HashMap::new(), + }; + let tools_parameters = ToolParameters { + parameters_type: "dict".to_string(), + properties, + }; - let ner_request = NERRequest { - input: callout_context.user_message.take().unwrap(), - labels: entity_names, - model: DEFAULT_NER_MODEL.to_string(), - }; + let tools_defintion: ToolsDefinition = ToolsDefinition { + name: prompt_target.name.clone(), + description: prompt_target.description.clone().unwrap_or("".to_string()), + parameters: tools_parameters, + }; - let json_data: String = match serde_json::to_string(&ner_request) { - Ok(json_data) => json_data, - Err(e) => { - warn!("Error serializing ner_request: {:?}", e); - self.resume_http_request(); - return; + let chat_completions = ChatCompletions { + model: GPT_35_TURBO.to_string(), + messages: callout_context.request_body.messages.clone(), + tools: Some(vec![tools_defintion]), + }; + + let msg_body = match serde_json::to_string(&chat_completions) { + Ok(msg_body) => { + debug!("msg_body: {}", msg_body); + msg_body + } + Err(e) => { + warn!("Error serializing request_params: {:?}", e); + self.resume_http_request(); + return; + } + }; + + let token_id = match self.dispatch_http_call( + BOLT_FC_CLUSTER, + vec![ + (":method", "POST"), + (":path", "/v1/chat/completions"), + (":authority", BOLT_FC_CLUSTER), + ("content-type", "application/json"), + ("x-envoy-max-retries", "3"), + ( + "x-envoy-upstream-rq-timeout-ms", + BOLT_FC_REQUEST_TIMEOUT_MS.to_string().as_str(), + ), + ], + Some(msg_body.as_bytes()), + vec![], + Duration::from_secs(5), + ) { + Ok(token_id) => token_id, + Err(e) => { + panic!("Error dispatching HTTP call for function-call: {:?}", e); + } + }; + + debug!( + "dispatched call to function {} token_id={}", + BOLT_FC_CLUSTER, token_id + ); + + callout_context.request_type = RequestType::FunctionResolver; + callout_context.prompt_target = Some(prompt_target); + if self.callouts.insert(token_id, callout_context).is_some() { + panic!("duplicate token_id") + } } - }; - - let token_id = match self.dispatch_http_call( - "nerhost", - vec![ - (":method", "POST"), - (":path", "/ner"), - (":authority", "nerhost"), - ("content-type", "application/json"), - ("x-envoy-max-retries", "3"), - ], - Some(json_data.as_bytes()), - vec![], - Duration::from_secs(5), - ) { - Ok(token_id) => token_id, - Err(e) => { - panic!("Error dispatching HTTP call for get-embeddings: {:?}", e); - } - }; - callout_context.request_type = RequestType::Ner; - callout_context.prompt_target = Some(prompt_target); - if self.callouts.insert(token_id, callout_context).is_some() { - panic!("duplicate token_id") } self.metrics.active_http_calls.increment(1); } - fn ner_handler(&mut self, body: Vec, mut callout_context: CallContext) { - let ner_response: NERResponse = match serde_json::from_slice(&body) { - Ok(ner_response) => ner_response, + fn function_resolver_handler(&mut self, body: Vec, mut callout_context: CallContext) { + debug!("response received for function resolver"); + + let body_str = String::from_utf8(body).unwrap(); + debug!("function_resolver response str: {:?}", body_str); + + let mut boltfc_response: BoltFCResponse = serde_json::from_str(&body_str).unwrap(); + + let boltfc_response_str = boltfc_response.message.content.as_ref().unwrap(); + + let tools_call_response: BoltFCToolsCall = match serde_json::from_str(boltfc_response_str) { + Ok(fc_resp) => fc_resp, Err(e) => { - warn!("Error deserializing ner_response: {:?}", e); - self.resume_http_request(); - return; - } - }; - info!("ner_response: {:?}", ner_response); + // This means that Bolt FC did not have enough information to resolve the function call + // Bolt FC probably responded with a message asking for more information. + // Let's send the response back to the user to initalize lightweight dialog for parameter collection - let mut request_params: HashMap = HashMap::new(); - for entity in ner_response.data.into_iter() { - if entity.score < DEFAULT_NER_THRESHOLD { - warn!( - "score of entity was too low entity name: {}, score: {}", - entity.label, entity.score + // add resolver name to the response so the client can send the response back to the correct resolver + boltfc_response.resolver_name = Some(callout_context.prompt_target.unwrap().name); + info!("some requred parameters are missing, sending response from Bolt FC back to user for parameter collection: {}", e); + let bolt_fc_dialogue_message = serde_json::to_string(&boltfc_response).unwrap(); + self.send_http_response( + StatusCode::OK.as_u16().into(), + vec![("Powered-By", "Katanemo")], + Some(bolt_fc_dialogue_message.as_bytes()), ); - continue; - } - request_params.insert(entity.label, entity.text); - } - - let prompt_target = callout_context.prompt_target.as_ref().unwrap(); - - let empty_vec: Vec = vec![]; - for entity in prompt_target.entities.as_ref().unwrap_or(&empty_vec) { - if entity.required.unwrap_or(false) && !request_params.contains_key(&entity.name) { - warn!( - "required entity missing or score of entity was too low: {}", - entity.name - ); - self.resume_http_request(); - return; - } - } - - let req_param_bytes = match serde_json::to_string(&request_params) { - Ok(req_param_str) => req_param_str.as_bytes().to_owned(), - Err(e) => { - warn!("Error serializing request_params: {:?}", e); - self.resume_http_request(); return; } }; - let endpoint = callout_context + // verify required parameters are present + callout_context .prompt_target .as_ref() .unwrap() - .endpoint + .parameters .as_ref() - .unwrap(); + .unwrap() + .iter() + .for_each(|param| match param.required { + None => {} + Some(required) => { + if required + && !tools_call_response.tool_calls[0] + .arguments + .contains_key(¶m.name) + { + warn!("boltfc did not extract required parameter: {}", param.name); + return self.send_http_response( + StatusCode::BAD_REQUEST.as_u16().into(), + vec![], + Some("missing required parameter".as_bytes()), + ); + } + } + }); - let http_path = match &endpoint.path { - Some(path) => path, - None => "/", - }; + debug!("tool_call_details: {:?}", tools_call_response); + let tool_name = &tools_call_response.tool_calls[0].name; + let tool_params = &tools_call_response.tool_calls[0].arguments; + debug!("tool_name: {:?}", tool_name); + debug!("tool_params: {:?}", tool_params); + let prompt_target = callout_context.prompt_target.as_ref().unwrap(); + debug!("prompt_target: {:?}", prompt_target); - let http_method = match &endpoint.method { - Some(method) => method, - None => http::Method::POST.as_str(), - }; + let tool_params_json_str = serde_json::to_string(&tool_params).unwrap(); + let endpoint = prompt_target.endpoint.as_ref().unwrap(); let token_id = match self.dispatch_http_call( &endpoint.cluster, vec![ - (":method", http_method), - (":path", http_path), + (":method", "POST"), + (":path", endpoint.path.as_ref().unwrap_or(&"/".to_string())), (":authority", endpoint.cluster.as_str()), ("content-type", "application/json"), ("x-envoy-max-retries", "3"), ], - Some(&req_param_bytes), + Some(tool_params_json_str.as_bytes()), vec![], Duration::from_secs(5), ) { Ok(token_id) => token_id, Err(e) => { - panic!("Error dispatching HTTP call for context_resolver: {:?}", e); + panic!("Error dispatching HTTP call for function_resolver: {:?}", e); } }; - callout_context.request_type = RequestType::ContextResolver; + + callout_context.request_type = RequestType::FunctionCallResponse; if self.callouts.insert(token_id, callout_context).is_some() { panic!("duplicate token_id") } self.metrics.active_http_calls.increment(1); } - fn context_resolver_handler(&mut self, body: Vec, callout_context: CallContext) { - debug!("response received for context_resolver"); - let body_string = String::from_utf8(body); - let prompt_target = callout_context.prompt_target.unwrap(); - let mut request_body = callout_context.request_body; - match prompt_target.system_prompt { + fn function_call_response_handler(&mut self, body: Vec, callout_context: CallContext) { + debug!("response received for function call response"); + let body_str: String = String::from_utf8(body).unwrap(); + debug!("function_call_response response str: {:?}", body_str); + let prompt_target = callout_context.prompt_target.as_ref().unwrap(); + + let mut messages: Vec = callout_context.request_body.messages.clone(); + + // add system prompt + match prompt_target.system_prompt.as_ref() { None => {} Some(system_prompt) => { - let system_prompt_message: Message = Message { + let system_prompt_message = Message { role: SYSTEM_ROLE.to_string(), - content: Some(system_prompt), + content: Some(system_prompt.clone()), + model: None, }; - request_body.messages.push(system_prompt_message); - } - } - match body_string { - Ok(body_string) => { - info!("context_resolver response: {}", body_string); - let context_resolver_response = Message { - role: USER_ROLE.to_string(), - content: Some(body_string), - }; - request_body.messages.push(context_resolver_response); - } - Err(e) => { - warn!("Error converting response to string: {:?}", e); - self.resume_http_request(); - return; + messages.push(system_prompt_message); } } - let json_string = match serde_json::to_string(&request_body) { + // add data from function call response + messages.push({ + Message { + role: USER_ROLE.to_string(), + content: Some(body_str), + model: None, + } + }); + + // add original user prompt + messages.push({ + Message { + role: USER_ROLE.to_string(), + content: Some(callout_context.user_message.unwrap()), + model: None, + } + }); + + let request_message: ChatCompletions = ChatCompletions { + model: GPT_35_TURBO.to_string(), + messages, + tools: None, + }; + + let json_string = match serde_json::to_string(&request_message) { Ok(json_string) => json_string, Err(e) => { warn!("Error serializing request_body: {:?}", e); @@ -350,6 +429,12 @@ impl StreamContext { return; } }; + debug!( + "function_calling sending request to openai: msg {}", + json_string + ); + + let request_body = callout_context.request_body; // Tokenize and Ratelimit. if let Some(selector) = self.ratelimit_selector.take() { @@ -405,8 +490,7 @@ impl HttpContext for StreamContext { // Deserialize body into spec. // Currently OpenAI API. - let mut deserialized_body: ChatCompletions = match self.get_http_request_body(0, body_size) - { + let deserialized_body: ChatCompletions = match self.get_http_request_body(0, body_size) { Some(body_bytes) => match serde_json::from_slice(&body_bytes) { Ok(deserialized) => deserialized, Err(msg) => { @@ -434,12 +518,12 @@ impl HttpContext for StreamContext { let user_message = match deserialized_body .messages - .pop() - .and_then(|last_message| last_message.content) + .last() + .and_then(|last_message| last_message.content.clone()) { Some(content) => content, None => { - info!("No messages in the request body"); + warn!("No messages in the request body"); return Action::Continue; } }; @@ -468,6 +552,7 @@ impl HttpContext for StreamContext { (":authority", "embeddingserver"), ("content-type", "application/json"), ("x-envoy-max-retries", "3"), + ("x-envoy-upstream-rq-timeout-ms", "60000"), ], Some(json_data.as_bytes()), vec![], @@ -481,6 +566,11 @@ impl HttpContext for StreamContext { ); } }; + debug!( + "dispatched HTTP call to embedding server token_id={}", + token_id + ); + let call_context = CallContext { request_type: RequestType::GetEmbedding, user_message: Some(user_message), @@ -530,8 +620,10 @@ impl Context for StreamContext { match callout_context.request_type { RequestType::GetEmbedding => self.embeddings_handler(body, callout_context), RequestType::SearchPoints => self.search_points_handler(body, callout_context), - RequestType::Ner => self.ner_handler(body, callout_context), - RequestType::ContextResolver => self.context_resolver_handler(body, callout_context), + RequestType::FunctionResolver => self.function_resolver_handler(body, callout_context), + RequestType::FunctionCallResponse => { + self.function_call_response_handler(body, callout_context) + } } } } diff --git a/envoyfilter/tests/integration.rs b/envoyfilter/tests/integration.rs index 71cd550e..724e9b6b 100644 --- a/envoyfilter/tests/integration.rs +++ b/envoyfilter/tests/integration.rs @@ -8,9 +8,14 @@ use proxy_wasm_test_framework::tester::{self, Tester}; use proxy_wasm_test_framework::types::{ Action, BufferType, LogLevel, MapType, MetricType, ReturnType, }; -use public_types::configuration::{self, Endpoint, PromptTarget}; use public_types::{ - common_types::{self, NERResponse, SearchPointResult, SearchPointsResponse}, + common_types::{ + open_ai::Message, BoltFCResponse, BoltFCToolsCall, IntOrString, ToolCallDetail, + }, + configuration::{self, Endpoint, PromptTarget}, +}; +use public_types::{ + common_types::{SearchPointResult, SearchPointsResponse}, configuration::Configuration, }; use serial_test::serial; @@ -87,6 +92,7 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { .expect_http_call(Some("embeddingserver"), None, None, None, None) .returning(Some(1)) .expect_metric_increment("active_http_calls", 1) + .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::Action(Action::Pause)) .unwrap(); @@ -120,12 +126,14 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { let prompt_target = PromptTarget { name: String::from("test-prompt-target"), - prompt_type: String::from("test-prompt-type"), + description: None, + prompt_type: configuration::PromptType::FunctionResolver, few_shot_examples: vec![], - entities: Some(vec![configuration::Entity { + parameters: Some(vec![configuration::Parameter { name: String::from("test-entity"), + parameter_type: Some(String::from("string")), + description: String::from("test-description"), required: Some(true), - description: None, }]), endpoint: Some(Endpoint { cluster: String::from("test-endpoint-cluster"), @@ -159,33 +167,13 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) { .returning(Some(&search_points_response_buffer)) .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Info), None) - .expect_http_call(Some("nerhost"), None, None, None, None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call(Some("bolt_fc_1b"), None, None, None, None) .returning(Some(3)) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_increment("active_http_calls", 1) .execute_and_expect(ReturnType::None) .unwrap(); - - let ner_reponse = NERResponse { - model: String::from("test-model"), - data: vec![common_types::Entity { - score: 0.7, - text: String::from("test-text"), - label: String::from("test-entity"), - }], - }; - let ner_response_buffer = serde_json::to_string(&ner_reponse).unwrap(); - let upstream_name = prompt_target.endpoint.unwrap().cluster.leak(); - module - .call_proxy_on_http_call_response(http_context, 3, 0, ner_response_buffer.len() as i32, 0) - .expect_metric_increment("active_http_calls", -1) - .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) - .returning(Some(&ner_response_buffer)) - .expect_log(Some(LogLevel::Info), None) - .expect_http_call(Some(upstream_name), None, None, None, None) - .returning(Some(4)) - .expect_metric_increment("active_http_calls", 1) - .execute_and_expect(ReturnType::None) - .unwrap() } fn default_config() -> Configuration { @@ -209,7 +197,7 @@ system_prompt: | - Use miles per hour for wind speed prompt_targets: - - type: context_resolver + - type: function_resolver name: weather_forecast few_shot_examples: - what is the weather in New York? @@ -221,7 +209,7 @@ prompt_targets: required: true description: "The location for which the weather is requested" - - type: context_resolver + - type: function_resolver name: weather_forecast_2 few_shot_examples: - what is the weather in New York? @@ -327,6 +315,7 @@ fn successful_request_to_open_ai_chat_completions() { .returning(Some(chat_completions_request_body)) // TODO: assert that the model field was added. .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) + .expect_log(Some(LogLevel::Debug), None) .expect_metric_increment("active_http_calls", 1) .execute_and_expect(ReturnType::Action(Action::Pause)) .unwrap(); @@ -460,14 +449,57 @@ fn request_ratelimited() { normal_flow(&mut module, filter_context, http_context); - let test_body = "test body"; + let tool_call_detail = vec![ToolCallDetail { + name: String::from("test-tool"), + arguments: HashMap::from([( + String::from("test-entity"), + IntOrString::Text(String::from("test-value")), + )]), + }]; + + let boltfc_tools_call = BoltFCToolsCall { + tool_calls: tool_call_detail, + }; + + let bolt_fc_resp = BoltFCResponse { + model: String::from("test"), + message: Message { + role: String::from("system"), + content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()), + model: None, + }, + done_reason: String::from("test"), + done: true, + resolver_name: None, + }; + + let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); module - .call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0) + .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) .expect_metric_increment("active_http_calls", -1) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) - .returning(Some(test_body)) + .returning(Some(&bolt_fc_resp_str)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call(Some("test-endpoint-cluster"), None, None, None, None) + .returning(Some(4)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let body_text = String::from("test body"); + module + .call_proxy_on_http_call_response(http_context, 4, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_send_local_response( @@ -522,18 +554,61 @@ fn request_not_ratelimited() { normal_flow(&mut module, filter_context, http_context); - let test_body = "test body"; + let tool_call_detail = vec![ToolCallDetail { + name: String::from("test-tool"), + arguments: HashMap::from([( + String::from("test-entity"), + IntOrString::Text(String::from("test-value")), + )]), + }]; + + let boltfc_tools_call = BoltFCToolsCall { + tool_calls: tool_call_detail, + }; + + let bolt_fc_resp = BoltFCResponse { + model: String::from("test"), + message: Message { + role: String::from("system"), + content: Some(serde_json::to_string(&boltfc_tools_call).unwrap()), + model: None, + }, + done_reason: String::from("test"), + done: true, + resolver_name: None, + }; + + let bolt_fc_resp_str = serde_json::to_string(&bolt_fc_resp).unwrap(); module - .call_proxy_on_http_call_response(http_context, 4, 0, test_body.len() as i32, 0) + .call_proxy_on_http_call_response(http_context, 3, 0, bolt_fc_resp_str.len() as i32, 0) .expect_metric_increment("active_http_calls", -1) .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) - .returning(Some(test_body)) + .returning(Some(&bolt_fc_resp_str)) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_log(Some(LogLevel::Debug), None) + .expect_http_call(Some("test-endpoint-cluster"), None, None, None, None) + .returning(Some(4)) + .expect_metric_increment("active_http_calls", 1) + .execute_and_expect(ReturnType::None) + .unwrap(); + + let body_text = String::from("test body"); + module + .call_proxy_on_http_call_response(http_context, 4, 0, body_text.len() as i32, 0) + .expect_metric_increment("active_http_calls", -1) + .expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody)) + .returning(Some(&body_text)) + .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) - .expect_log(Some(LogLevel::Info), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_log(Some(LogLevel::Debug), None) .expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None) + .expect_log(Some(LogLevel::Debug), None) .execute_and_expect(ReturnType::None) .unwrap(); } diff --git a/function_resolver/.vscode/launch.json b/function_resolver/.vscode/launch.json new file mode 100644 index 00000000..0de58feb --- /dev/null +++ b/function_resolver/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "function resolver server", + "cwd": "${workspaceFolder}/app", + "type": "debugpy", + "request": "launch", + "module": "uvicorn", + "args": ["main:app","--reload", "--host", "0.0.0.0", "--port", "8001", "--log-config", "logger.yaml"], + } + ] +} diff --git a/function_resolver/Dockerfile b/function_resolver/Dockerfile new file mode 100644 index 00000000..fc2400c8 --- /dev/null +++ b/function_resolver/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3 AS base + +# +# builder +# +FROM base AS builder + +WORKDIR /src + +COPY requirements.txt /src/ +RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt + +COPY . /src + +# +# output +# + +FROM python:3-slim AS output + +COPY --from=builder /runtime /usr/local + +COPY /app /app +WORKDIR /app + +RUN apt-get update && apt-get install -y \ + curl \ + && rm -rf /var/lib/apt/lists/* + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logger.yaml"] diff --git a/function_resolver/app/bolt_handler.py b/function_resolver/app/bolt_handler.py new file mode 100644 index 00000000..bd544803 --- /dev/null +++ b/function_resolver/app/bolt_handler.py @@ -0,0 +1,225 @@ +import json +from typing import Any, Dict, List + + +SYSTEM_PROMPT = """ +[BEGIN OF TASK INSTRUCTION] +You are a function calling assistant with access to the following tools. You task is to assist users as best as you can. +For each user query, you may need to call one or more functions to to better generate responses. +If none of the functions are relevant, you should point it out. +If the given query lacks the parameters required by the function, you should ask users for clarification. +The users may execute functions and return results as `Observation` to you. In the case, you MUST generate responses by summarizing it. +[END OF TASK INSTRUCTION] +""".strip() + +TOOL_PROMPT = """ +[BEGIN OF AVAILABLE TOOLS] +{tool_text} +[END OF AVAILABLE TOOLS] +""".strip() + +FORMAT_PROMPT = """ +[BEGIN OF FORMAT INSTRUCTION] +You MUST use the following JSON format if using tools. +The example format is as follows. DO NOT use this format if no function call is needed. +``` +{ + "tool_calls": [ + {"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}}, + ... (more tool calls as required) + ] +} +``` +[END OF FORMAT INSTRUCTION] +""".strip() + + +class BoltHandler: + def _format_system(self, tools: List[Dict[str, Any]]): + tool_text = self._format_tools(tools=tools) + return ( + SYSTEM_PROMPT + + "\n\n" + + TOOL_PROMPT.format(tool_text=tool_text) + + "\n\n" + + FORMAT_PROMPT + + "\n" + ) + + def _format_tools(self, tools: List[Dict[str, Any]]): + TOOL_DESC = "> Tool Name: {name}\nTool Description: {desc}\nTool Args:\n{args}" + + tool_text = [] + for tool in tools: + param_text = self.get_param_text(tool.parameters) + tool_text.append( + TOOL_DESC.format( + name=tool.name, desc=tool.description, args=param_text + ) + ) + + return "\n".join(tool_text) + + def extract_tools(self, content, executable=False): + # retrieve `tool_calls` from model responses + try: + content_json = json.loads(content) + except Exception: + fixed_content = self.fix_json_string(content) + try: + content_json = json.loads(fixed_content) + except json.JSONDecodeError: + return content + + if isinstance(content_json, list): + tool_calls = content_json + elif isinstance(content_json, dict): + tool_calls = content_json.get("tool_calls", []) + else: + tool_calls = [] + + if not isinstance(tool_calls, list): + return content + + # process and extract tools from `tool_calls` + extracted = [] + + for tool_call in tool_calls: + if isinstance(tool_call, dict): + try: + if not executable: + extracted.append({tool_call["name"]: tool_call["arguments"]}) + else: + name, arguments = ( + tool_call.get("name", ""), + tool_call.get("arguments", {}), + ) + + for key, value in arguments.items(): + if value == "False" or value == "false": + arguments[key] = False + elif value == "True" or value == "true": + arguments[key] = True + + args_str = ", ".join( + [f"{key}={repr(value)}" for key, value in arguments.items()] + ) + + extracted.append(f"{name}({args_str})") + + except Exception: + continue + + return extracted + + def get_param_text(self, parameter_dict, prefix=""): + param_text = "" + + for name, param in parameter_dict["properties"].items(): + param_type = param.get("type", "") + + required, default, param_format, properties, enum, items = ( + "", + "", + "", + "", + "", + "", + ) + + if name in parameter_dict.get("required", []): + required = ", required" + + required_param = parameter_dict.get("required", []) + + if isinstance(required_param, bool): + required = ", required" if required_param else "" + elif isinstance(required_param, list) and name in required_param: + required = ", required" + else: + required = ", optional" + + default_param = param.get("default", None) + if default_param: + default = f", default: {default_param}" + + format_in = param.get("format", None) + if format_in: + param_format = f", format: {format_in}" + + desc = param.get("description", "") + + if "properties" in param: + arg_properties = self.get_param_text(param, prefix + " ") + properties += "with the properties:\n{}".format(arg_properties) + + enum_param = param.get("enum", None) + if enum_param: + enum = "should be one of [{}]".format(", ".join(enum_param)) + + item_param = param.get("items", None) + if item_param: + item_type = item_param.get("type", None) + if item_type: + items += "each item should be the {} type ".format(item_type) + + item_properties = item_param.get("properties", None) + if item_properties: + item_properties = self.get_param_text(item_param, prefix + " ") + items += "with the properties:\n{}".format(item_properties) + + illustration = ", ".join( + [x for x in [desc, properties, enum, items] if len(x)] + ) + + param_text += ( + prefix + + "- {name} ({param_type}{required}{param_format}{default}): {illustration}\n".format( + name=name, + param_type=param_type, + required=required, + param_format=param_format, + default=default, + illustration=illustration, + ) + ) + + return param_text + + def fix_json_string(self, json_str): + # Remove any leading or trailing whitespace or newline characters + json_str = json_str.strip() + + # Stack to keep track of brackets + stack = [] + + # Clean string to collect valid characters + fixed_str = "" + + # Dictionary for matching brackets + matching_bracket = {")": "(", "}": "{", "]": "["} + + # Dictionary for the opposite of matching_bracket + opening_bracket = {v: k for k, v in matching_bracket.items()} + + for char in json_str: + if char in "{[(": + stack.append(char) + fixed_str += char + elif char in "}])": + if stack and stack[-1] == matching_bracket[char]: + stack.pop() + fixed_str += char + else: + # Ignore the unmatched closing brackets + continue + else: + fixed_str += char + + # If there are unmatched opening brackets left in the stack, add corresponding closing brackets + while stack: + unmatched_opening = stack.pop() + fixed_str += opening_bracket[unmatched_opening] + + # Attempt to parse the corrected string to ensure it’s valid JSON + return fixed_str diff --git a/function_resolver/app/common.py b/function_resolver/app/common.py new file mode 100644 index 00000000..3b44863e --- /dev/null +++ b/function_resolver/app/common.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel + +class Tool(BaseModel): + name: str + description: str + parameters: dict + +class Message(BaseModel): + role: str + content: str + +class ChatMessage(BaseModel): + messages: list[Message] + tools: list[Tool] diff --git a/function_resolver/app/logger.yaml b/function_resolver/app/logger.yaml new file mode 100644 index 00000000..f900363d --- /dev/null +++ b/function_resolver/app/logger.yaml @@ -0,0 +1,14 @@ +version: 1 +disable_existing_loggers: False +formatters: + timestamped: + format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +handlers: + console: + class: logging.StreamHandler + level: INFO + formatter: timestamped + stream: ext://sys.stdout +root: + level: INFO + handlers: [console] diff --git a/function_resolver/app/main.py b/function_resolver/app/main.py new file mode 100644 index 00000000..8296b128 --- /dev/null +++ b/function_resolver/app/main.py @@ -0,0 +1,40 @@ +from fastapi import FastAPI, Response +from bolt_handler import BoltHandler +from common import ChatMessage +import logging +from ollama import Client +import os + +ollama_endpoint = os.getenv("OLLAMA_ENDPOINT", "localhost") +ollama_model = os.getenv("OLLAMA_MODEL", "Bolt-Function-Calling-1B:Q4_K_M") +logger = logging.getLogger('uvicorn.error') + +logger.info(f"using model: {ollama_model}") +logger.info(f"using ollama endpoint: {ollama_endpoint}") + +app = FastAPI() +handler = BoltHandler() + +ollama_client = Client(host=ollama_endpoint) + + +@app.get("/healthz") +async def healthz(): + return { + "status": "ok" + } + + +@app.post("/v1/chat/completions") +async def chat_completion(req: ChatMessage, res: Response): + logger.info("starting request") + tools_encoded = handler._format_system(req.tools) + messages = [] + messages.append( + {"role": "system", "content": tools_encoded} + ) + messages.append({"role": "user", "content": req.messages[-1].content}) + + resp = ollama_client.chat(messages=messages, model=ollama_model, stream=False) + logger.info(f"response: {resp}") + return resp diff --git a/function_resolver/requirements.txt b/function_resolver/requirements.txt new file mode 100644 index 00000000..240ae61d --- /dev/null +++ b/function_resolver/requirements.txt @@ -0,0 +1,4 @@ +fastapi +uvicorn +ollama +PyYAML diff --git a/function_resolver/test/test.sh b/function_resolver/test/test.sh new file mode 100644 index 00000000..618b5136 --- /dev/null +++ b/function_resolver/test/test.sh @@ -0,0 +1,5 @@ +PORT="${PORT:-8001}" + +echo localhost:$PORT/v1/chat/completions + +curl -H "content-type: application/json" -XPOST localhost:$PORT/v1/chat/completions -d @test_payload.json diff --git a/function_resolver/test/test_envoy_cluster.sh b/function_resolver/test/test_envoy_cluster.sh new file mode 100644 index 00000000..3aea11e5 --- /dev/null +++ b/function_resolver/test/test_envoy_cluster.sh @@ -0,0 +1,5 @@ +PORT="${PORT:-8001}" + +echo localhost:$PORT/bolt_fc_1b/v1/chat/completions + +curl -v -H "content-type: application/json" -XPOST localhost:$PORT/bolt_fc_1b/v1/chat/completions -d @test_payload.json diff --git a/function_resolver/test/test_missing_param.sh b/function_resolver/test/test_missing_param.sh new file mode 100644 index 00000000..67d40c5c --- /dev/null +++ b/function_resolver/test/test_missing_param.sh @@ -0,0 +1 @@ +curl -H "content-type: application/json" -XPOST localhost:8001/v1/chat/completions -d @test_payload_missing_param.json diff --git a/function_resolver/test/test_payload.json b/function_resolver/test/test_payload.json new file mode 100644 index 00000000..66e3d202 --- /dev/null +++ b/function_resolver/test/test_payload.json @@ -0,0 +1,33 @@ +{ + "messages": [ + { + "role": "user", + "content": "Find the area of a triangle with a base of 10 units and height of 5 units." + } + ], + "tools": [ + { + "name": "calculate_triangle_area", + "description": "Calculate the area of a triangle given its base and height.", + "parameters": { + "type": "dict", + "properties": { + "base": { + "type": "integer", + "description": "The base of the triangle.", + "required": true + }, + "height": { + "type": "integer", + "description": "The height of the triangle.", + "required": true + }, + "unit": { + "type": "string", + "description": "The unit of measure (defaults to 'units' if not specified)" + } + } + } + } + ] +} diff --git a/function_resolver/test/test_payload_missing_param.json b/function_resolver/test/test_payload_missing_param.json new file mode 100644 index 00000000..7f23f27c --- /dev/null +++ b/function_resolver/test/test_payload_missing_param.json @@ -0,0 +1,32 @@ +{ + "messages": [ + { + "role": "user", + "content": "Find the area of a triangle" + } + ], + "tools": [ + { + "name": "calculate_triangle_area", + "description": "Calculate the area of a triangle given its base and height.", + "parameters": { + "type": "dict", + "properties": { + "base": { + "type": "integer", + "description": "The base of the triangle." + }, + "height": { + "type": "integer", + "description": "The height of the triangle." + }, + "unit": { + "type": "string", + "description": "The unit of measure (defaults to 'units' if not specified)" + } + }, + "required": ["base", "height"] + } + } + ] +} diff --git a/function_resolver/test/test_weather.json b/function_resolver/test/test_weather.json new file mode 100644 index 00000000..d26e717e --- /dev/null +++ b/function_resolver/test/test_weather.json @@ -0,0 +1,29 @@ +{ + "messages": [ + { + "role": "user", + "content": "how is the weather in San Francisco for next 5 days?" + } + ], + "tools": [ + { + "name": "weather_forecast", + "description": "This function resolver provides weather forecast information for a given city.", + "parameters": { + "type": "dict", + "properties": { + "days": { + "description": "The number of days for which the weather forecast is requested." + }, + "units": { + "description": "The units in which the weather forecast is requested." + }, + "city": { + "description": "The city for which the weather forecast is requested.", + "required": true + } + } + } + } + ] +} diff --git a/gateway.code-workspace b/gateway.code-workspace index b4f6b9e2..109f42d7 100644 --- a/gateway.code-workspace +++ b/gateway.code-workspace @@ -12,6 +12,10 @@ "name": "embedding-server", "path": "embedding-server" }, + { + "name": "function_resolver", + "path": "function_resolver" + }, { "name": "chatbot-ui", "path": "chatbot-ui" @@ -21,13 +25,9 @@ "path": "open-message-format" }, { - "name": "demos/weather-forecast", - "path": "./demos/weather-forecast", + "name": "demos/function-calling", + "path": "./demos/function-calling", }, - { - "name": "demos/weather-forecast-local-llm", - "path": "./demos/weather-forecast-local-llm", - } ], "settings": {} } diff --git a/public-types/src/common_types.rs b/public-types/src/common_types.rs index 4882e1bb..838aa2b8 100644 --- a/public-types/src/common_types.rs +++ b/public-types/src/common_types.rs @@ -53,37 +53,75 @@ pub struct SearchPointsResponse { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NERRequest { - pub input: String, - pub labels: Vec, - pub model: String, +pub struct ToolParameter { + #[serde(rename = "type")] + #[serde(skip_serializing_if = "Option::is_none")] + pub parameter_type: Option, + pub description: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub required: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Entity { - pub text: String, - pub label: String, - pub score: f64, +pub struct ToolParameters { + #[serde(rename = "type")] + pub parameters_type: String, + pub properties: HashMap, } + #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct NERResponse { - pub data: Vec, +pub struct ToolsDefinition { + pub name: String, + pub description: String, + pub parameters: ToolParameters, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BoltFCResponse { pub model: String, + pub message: open_ai::Message, + pub done_reason: String, + pub done: bool, + pub resolver_name: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum IntOrString { + Integer(i32), + Text(String), +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ToolCallDetail { + pub name: String, + pub arguments: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BoltFCToolsCall { + pub tool_calls: Vec, } pub mod open_ai { use serde::{Deserialize, Serialize}; + use super::ToolsDefinition; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ChatCompletions { #[serde(default)] pub model: String, pub messages: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub tools: Option>, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Message { pub role: String, pub content: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub model: Option, } } diff --git a/public-types/src/configuration.rs b/public-types/src/configuration.rs index a888d56b..36e6eb8f 100644 --- a/public-types/src/configuration.rs +++ b/public-types/src/configuration.rs @@ -80,19 +80,28 @@ pub struct Endpoint { } #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Entity { +pub struct Parameter { pub name: String, + #[serde(rename = "type")] + pub parameter_type: Option, + pub description: String, pub required: Option, - pub description: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum PromptType { + #[serde(rename = "function_resolver")] + FunctionResolver, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PromptTarget { #[serde(rename = "type")] - pub prompt_type: String, + pub prompt_type: PromptType, pub name: String, + pub description: Option, pub few_shot_examples: Vec, - pub entities: Option>, + pub parameters: Option>, pub endpoint: Option, pub system_prompt: Option, } @@ -119,27 +128,29 @@ system_prompt: | - Use miles per hour for wind speed prompt_targets: - - type: context_resolver + + - type: function_resolver name: weather_forecast few_shot_examples: - what is the weather in New York? endpoint: cluster: weatherhost path: /weather - entities: + parameters: - name: location required: true description: "The location for which the weather is requested" - - type: context_resolver + - type: function_resolver name: weather_forecast_2 few_shot_examples: - what is the weather in New York? endpoint: cluster: weatherhost path: /weather - entities: + parameters: - name: city + description: "The location for which the weather is requested" ratelimits: - provider: open-ai-gpt-4