Merge branch 'main' into adil/add_acm_demo

This commit is contained in:
Adil Hafeez 2025-02-12 10:39:08 -08:00
commit ca766f81fa
No known key found for this signature in database
GPG key ID: 9B18EF7691369645
130 changed files with 1716 additions and 5580 deletions

View file

@ -1,40 +1,97 @@
name: Publish Docker image
env:
DOCKER_IMAGE: katanemo/archgw
on:
release:
types: [published]
push:
branches:
- main
jobs:
push_to_registry:
name: Push Docker image to Docker Hub
runs-on: ubuntu-latest
permissions:
packages: write
contents: read
attestations: write
id-token: write
# Build ARM64 image on native ARM64 runner
build-arm64:
runs-on: [linux-arm64]
steps:
- name: Check out the repo
- name: Checkout Repository
uses: actions/checkout@v4
- name: Log in to Docker Hub
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
uses: docker/metadata-action@v5
with:
images: katanemo/archgw
images: ${{ env.DOCKER_IMAGE }}
tags: |
type=raw,value=latest # Force the tag to be "latest"
- name: Build and push Docker image
id: push
uses: docker/build-push-action@3b5e8027fcad23fda98b2e3ac259d8d67585f671
- name: Build and Push ARM64 Image
uses: docker/build-push-action@v5
with:
context: .
file: ./arch/Dockerfile
platforms: linux/arm64
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
tags: ${{ steps.meta.outputs.tags }}-arm64
# Build AMD64 image on GitHub's AMD64 runner
build-amd64:
runs-on: ubuntu-latest
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_IMAGE }}
tags: |
type=raw,value=latest # Force the tag to be "latest"
- name: Build and Push AMD64 Image
uses: docker/build-push-action@v5
with:
context: .
file: ./arch/Dockerfile
platforms: linux/amd64
push: true
tags: ${{ steps.meta.outputs.tags }}-amd64
# Combine ARM64 and AMD64 images into a multi-arch manifest
create-manifest:
runs-on: ubuntu-latest
needs: [build-arm64, build-amd64] # Wait for both builds
steps:
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.DOCKER_IMAGE }}
tags: |
type=raw,value=latest # Force the tag to be "latest"
- name: Create Multi-Arch Manifest
run: |
# Combine the architecture-specific images into a "latest" manifest
docker buildx imagetools create -t ${{ steps.meta.outputs.tags }} \
${{ env.DOCKER_IMAGE }}:latest-arm64 \
${{ env.DOCKER_IMAGE }}:latest-amd64

View file

@ -7,7 +7,7 @@ on:
pull_request:
jobs:
test:
e2e_archgw_tests:
runs-on: ubuntu-latest-m
defaults:
run:

View file

@ -7,7 +7,7 @@ on:
pull_request:
jobs:
test:
e2e_model_server_tests:
runs-on: ubuntu-latest-m
defaults:
run:

View file

@ -7,7 +7,7 @@ on:
pull_request:
jobs:
test:
e2e_demo_tests:
runs-on: ubuntu-latest-m
steps:
@ -37,7 +37,7 @@ jobs:
source venv/bin/activate
cd model_server/ && echo "installing model server" && poetry install
cd ../arch/tools && echo "installing archgw cli" && poetry install
cd ../../demos/test_runner && echo "installing test dependencies" && poetry install
cd ../../demos/shared/test_runner && echo "installing test dependencies" && poetry install
- name: run demo tests
env:
@ -45,4 +45,4 @@ jobs:
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
run: |
source venv/bin/activate
cd demos/test_runner && sh run_demo_tests.sh
cd demos/shared/test_runner && sh run_demo_tests.sh

View file

@ -7,7 +7,7 @@ on:
pull_request:
jobs:
test:
e2e_tests:
runs-on: ubuntu-latest
steps:

View file

@ -25,9 +25,7 @@ repos:
name: cargo-test
language: system
types: [file, rust]
# --lib is to only test the library, since when integration tests are made,
# they will be in a seperate tests directory
entry: bash -c "cd crates/llm_gateway && cargo test --lib"
entry: bash -c "cd crates && cargo test --lib"
- repo: https://github.com/psf/black
rev: 23.1.0

View file

@ -1,30 +1,43 @@
Focus on what matters most. Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps by simply connecting (existing) APIs.
<div align="center">
<img src="docs/source/_static/img/arch-logo.png" alt="Arch Logo" width="75%" heigh=auto>
</div>
<div align="center">
Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that:
>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization outside core business logic.*
![alt text](docs/source/_static/img/arch-logo.png)
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
_Arch is an intelligent (edge and LLM) proxy designed for agentic applications - to help you protect, observe, and build agentic tasks by simply connecting (existing) APIs._
[Quickstart](#Quickstart) •
[Demos](#Demos) •
[Build agentic apps with Arch](#Build-AI-Agent-with-Arch-Gateway) •
[Use Arch as an LLM router](#Use-Arch-Gateway-as-LLM-Router) •
[Documentation](https://docs.archgw.com) •
[Contact](#Contact)
[![pre-commit](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/pre-commit.yml)
[![rust tests (prompt and llm gateway)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/rust_tests.yml)
[![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml)
[![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml)
</div>
# Overview
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
Arch is engineered with purpose-built LLMs to handle critical but undifferentiated tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligent task routing for improved accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way.
Arch Gateway was built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that:
>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization outside core business logic.*
Arch is engineered with purpose-built LLMs to handle critical but pesky tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intent-based routing for improved task accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way.
**Core Features**:
- Built on [Envoy](https://envoyproxy.io): Arch runs alongside application servers as a separate containerized process, and builds on top of Envoy's proven HTTP management and scalability features to handle ingress and egress traffic related to prompts and LLMs.
- Task Routing & Fast Function Calling. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build more task-accurate agentic applications.
- Prompt [Guard](https://huggingface.co/collections/katanemo/arch-guard-6702bdc08b889e4bce8f446d): Arch centralizes guardrails to prevent jailbreak attempts and ensure safe user interactions without writing a single line of code.
- Routing & Traffic Management: Arch centralizes calls to LLMs used by your applications, offering smart retries, automatic cutover, and resilient upstream connections for continuous availability.
- Observability: Arch uses the W3C Trace Context standard to enable complete request tracing across applications, ensuring compatibility with observability tools, and provides metrics to monitor latency, token usage, and error rates, helping optimize AI application performance.
**High-Level Network Flow**:
- **Intent-based prompt routing & fast ⚡ function-calling via APIs**. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build more task-accurate agentic applications.
- **Prompt [Guard](https://huggingface.co/collections/katanemo/arch-guard-6702bdc08b889e4bce8f446d)**: Arch centralizes guardrails to prevent jailbreak attempts and ensure safe user interactions without writing a single line of code.
- **LLM Routing & Traffic Management**: Arch centralizes calls to LLMs used by your applications, offering smart retries, automatic cutover, and resilient upstream connections for continuous availability.
- **Observability**: Arch uses the W3C Trace Context standard to enable complete request tracing across applications, ensuring compatibility with observability tools, and provides metrics to monitor latency, token usage, and error rates, helping optimize AI application performance.
- **Built on [Envoy](https://envoyproxy.io)**: Arch runs alongside application servers as a separate containerized process, and builds on top of Envoy's proven HTTP management and scalability features to handle ingress and egress traffic related to prompts and LLMs.
**High-Level Sequence Diagram**:
![alt text](docs/source/_static/img/arch_network_diagram_high_level.png)
**Jump to our [docs](https://docs.archgw.com)** to learn how you can use Arch to improve the speed, security and personalization of your GenAI apps.
@ -36,9 +49,9 @@ Arch is engineered with purpose-built LLMs to handle critical but undifferentiat
To get in touch with us, please join our [discord server](https://discord.gg/pGZf2gcwEc). We will be monitoring that actively and offering support there.
## Demos
* [Weather Forecast](demos/weather_forecast/README.md) - Walk through of the core function calling capabilities of arch gateway using weather forecasting service
* [Insurance Agent](demos/insurance_agent/README.md) - Build a full insurance agent with Arch
* [Network Agent](demos/network_agent/README.md) - Build a networking co-pilot/agent agent with Arch
* [Sample App: Weather Forecast Agent](demos/samples_python/weather_forecast/README.md) - A sample agentic weather forecasting app that highlights core function calling capabilities of Arch.
* [Sample App: Network Operator Agent](demos/samples_python/network_switch_operator_agent/README.md) - A simple network device switch operator agent that can retrive device statistics and reboot them.
* [User Case: Connecting to SaaS APIs](demos/use_cases/spotify_bearer_auth) - Connect 3rd party SaaS APIs to your agentic chat experience.
## Quickstart
@ -60,7 +73,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently.
```console
$ python -m venv venv
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
$ pip install archgw==0.2.0
$ pip install archgw==0.2.1
```
### Build AI Agent with Arch Gateway

View file

@ -68,6 +68,8 @@ properties:
enum:
- http
- https
http_host:
type: string
additionalProperties: false
required:
- name
@ -77,6 +79,8 @@ properties:
properties:
prompt_target_intent_matching_threshold:
type: number
optimize_context_window:
type: boolean
system_prompt:
type: string
prompt_targets:
@ -133,6 +137,10 @@ properties:
enum:
- GET
- POST
http_headers:
type: object
additionalProperties:
type: string
additionalProperties: false
required:
- name

View file

@ -8,7 +8,7 @@ services:
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
- ${ARCH_CONFIG_FILE:-../demos/samples_python/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./envoy.template.yaml:/app/envoy.template.yaml
- ./arch_config_schema.yaml:/app/arch_config_schema.yaml

View file

@ -570,7 +570,11 @@ static_resources:
socket_address:
address: {{ local_llm_provider.endpoint }}
port_value: {{ local_llm_provider.port }}
{% if local_llm_provider.http_host %}
hostname: {{ local_llm_provider.http_host }}
{% else %}
hostname: {{ local_llm_provider.endpoint }}
{% endif %}
{% if local_llm_provider.protocol == "https" %}
transport_socket:
name: envoy.transport_sockets.tls

View file

@ -19,7 +19,7 @@ source venv/bin/activate
### Step 3: Run the build script
```bash
pip install archgw==0.2.0
pip install archgw==0.2.1
```
## Uninstall Instructions: archgw CLI

View file

@ -16,6 +16,20 @@ ARCH_CONFIG_SCHEMA_FILE = os.getenv(
)
def get_endpoint_and_port(endpoint, protocol):
endpoint_tokens = endpoint.split(":")
if len(endpoint_tokens) > 1:
endpoint = endpoint_tokens[0]
port = int(endpoint_tokens[1])
return endpoint, port
else:
if protocol == "http":
port = 80
else:
port = 443
return endpoint, port
def validate_and_render_schema():
env = Environment(loader=FileSystemLoader("./"))
template = env.get_template("envoy.template.yaml")
@ -42,9 +56,11 @@ def validate_and_render_schema():
for name, endpoint_details in endpoints.items():
inferred_clusters[name] = endpoint_details
endpoint = inferred_clusters[name]["endpoint"]
if len(endpoint.split(":")) > 1:
inferred_clusters[name]["endpoint"] = endpoint.split(":")[0]
inferred_clusters[name]["port"] = int(endpoint.split(":")[1])
protocol = inferred_clusters[name].get("protocol", "http")
(
inferred_clusters[name]["endpoint"],
inferred_clusters[name]["port"],
) = get_endpoint_and_port(endpoint, protocol)
print("defined clusters from arch_config.yaml: ", json.dumps(inferred_clusters))
@ -77,9 +93,10 @@ def validate_and_render_schema():
if llm_provider.get("endpoint", None):
endpoint = llm_provider["endpoint"]
if len(endpoint.split(":")) > 1:
llm_provider["endpoint"] = endpoint.split(":")[0]
llm_provider["port"] = int(endpoint.split(":")[1])
protocol = llm_provider.get("protocol", "http")
llm_provider["endpoint"], llm_provider["port"] = get_endpoint_and_port(
endpoint, protocol
)
llms_with_endpoint.append(llm_provider)
config_yaml["llm_providers"] = updated_llm_providers

View file

@ -89,6 +89,18 @@ def get_llm_provider_access_keys(arch_config_file):
if acess_key is not None:
access_key_list.append(acess_key)
for prompt_target in arch_config_yaml.get("prompt_targets", []):
for k, v in prompt_target.get("endpoint", {}).get("http_headers", {}).items():
if k.lower() == "authorization":
print(
f"found auth header: {k} for prompt_target: {prompt_target.get('name')}/{prompt_target.get('endpoint').get('name')}"
)
auth_tokens = v.split(" ")
if len(auth_tokens) > 1:
access_key_list.append(auth_tokens[1])
else:
access_key_list.append(v)
return access_key_list

22
arch/tools/poetry.lock generated
View file

@ -2,7 +2,7 @@
[[package]]
name = "archgw_modelserver"
version = "0.2.0"
version = "0.2.1"
description = "A model server for serving models"
optional = false
python-versions = "*"
@ -15,13 +15,13 @@ url = "../../model_server"
[[package]]
name = "attrs"
version = "24.3.0"
version = "25.1.0"
description = "Classes Without Boilerplate"
optional = false
python-versions = ">=3.8"
files = [
{file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"},
{file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"},
{file = "attrs-25.1.0-py3-none-any.whl", hash = "sha256:c75a69e28a550a7e93789579c22aa26b0f5b83b75dc4e08fe092980051e1090a"},
{file = "attrs-25.1.0.tar.gz", hash = "sha256:1c97078a80c814273a76b2a298a932eb681c87415c11dee0a6921de7f1b02c3e"},
]
[package.extras]
@ -34,13 +34,13 @@ tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
[[package]]
name = "certifi"
version = "2024.12.14"
version = "2025.1.31"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.6"
files = [
{file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"},
{file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"},
{file = "certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe"},
{file = "certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651"},
]
[[package]]
@ -370,13 +370,13 @@ files = [
[[package]]
name = "referencing"
version = "0.36.1"
version = "0.36.2"
description = "JSON Referencing + Python"
optional = false
python-versions = ">=3.9"
files = [
{file = "referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794"},
{file = "referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade"},
{file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"},
{file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"},
]
[package.dependencies]
@ -568,4 +568,4 @@ zstd = ["zstandard (>=0.18.0)"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "59543baf4d462d4830e7228ba9eda8ae865416fdabd8ede129492ac45f1926f2"
content-hash = "6b29791896ec1680e2c841ac42e835c1bada672b056d8208ab24388f70f9badb"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "archgw"
version = "0.2.0"
version = "0.2.1"
description = "Python-based CLI tool to manage Arch Gateway."
authors = ["Katanemo Labs, Inc."]
packages = [
@ -10,7 +10,7 @@ readme = "README.md"
[tool.poetry.dependencies]
python = "^3.10"
archgw_modelserver = "^0.2.0"
archgw_modelserver = "^0.2.1"
click = "^8.1.7"
jinja2 = "^3.1.4"
jsonschema = "^4.23.0"

302
crates/Cargo.lock generated
View file

@ -234,6 +234,8 @@ dependencies = [
"serde_yaml",
"thiserror",
"tiktoken-rs",
"url",
"urlencoding",
]
[[package]]
@ -477,6 +479,17 @@ dependencies = [
"winapi",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "duration-string"
version = "0.3.0"
@ -557,6 +570,15 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"
[[package]]
name = "form_urlencoded"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
dependencies = [
"percent-encoding",
]
[[package]]
name = "futures"
version = "0.3.31"
@ -782,12 +804,151 @@ dependencies = [
"itoa",
]
[[package]]
name = "icu_collections"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
dependencies = [
"displaydoc",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_locid"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
dependencies = [
"displaydoc",
"litemap",
"tinystr",
"writeable",
"zerovec",
]
[[package]]
name = "icu_locid_transform"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
dependencies = [
"displaydoc",
"icu_locid",
"icu_locid_transform_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_locid_transform_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
[[package]]
name = "icu_normalizer"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
dependencies = [
"displaydoc",
"icu_collections",
"icu_normalizer_data",
"icu_properties",
"icu_provider",
"smallvec",
"utf16_iter",
"utf8_iter",
"write16",
"zerovec",
]
[[package]]
name = "icu_normalizer_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
[[package]]
name = "icu_properties"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
dependencies = [
"displaydoc",
"icu_collections",
"icu_locid_transform",
"icu_properties_data",
"icu_provider",
"tinystr",
"zerovec",
]
[[package]]
name = "icu_properties_data"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
[[package]]
name = "icu_provider"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
dependencies = [
"displaydoc",
"icu_locid",
"icu_provider_macros",
"stable_deref_trait",
"tinystr",
"writeable",
"yoke",
"zerofrom",
"zerovec",
]
[[package]]
name = "icu_provider_macros"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "id-arena"
version = "2.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25a2bc672d1148e28034f176e01fffebb08b35768468cc954630da77a1449005"
[[package]]
name = "idna"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e"
dependencies = [
"idna_adapter",
"smallvec",
"utf8_iter",
]
[[package]]
name = "idna_adapter"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
dependencies = [
"icu_normalizer",
"icu_properties",
]
[[package]]
name = "indexmap"
version = "2.6.0"
@ -883,6 +1044,12 @@ version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
[[package]]
name = "litemap"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
[[package]]
name = "llm_gateway"
version = "0.1.0"
@ -1028,6 +1195,12 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
[[package]]
name = "percent-encoding"
version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pin-project-lite"
version = "0.2.14"
@ -1547,6 +1720,17 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "synstructure"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "target-lexicon"
version = "0.12.16"
@ -1606,6 +1790,16 @@ dependencies = [
"rustc-hash",
]
[[package]]
name = "tinystr"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
dependencies = [
"displaydoc",
"zerovec",
]
[[package]]
name = "toml"
version = "0.8.19"
@ -1676,6 +1870,35 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
[[package]]
name = "url"
version = "2.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60"
dependencies = [
"form_urlencoded",
"idna",
"percent-encoding",
]
[[package]]
name = "urlencoding"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
[[package]]
name = "utf16_iter"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
[[package]]
name = "utf8_iter"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
version = "1.11.0"
@ -2189,12 +2412,48 @@ dependencies = [
"wasmparser 0.212.0",
]
[[package]]
name = "write16"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
[[package]]
name = "writeable"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
[[package]]
name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
[[package]]
name = "yoke"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
dependencies = [
"serde",
"stable_deref_trait",
"yoke-derive",
"zerofrom",
]
[[package]]
name = "yoke-derive"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
"synstructure",
]
[[package]]
name = "zerocopy"
version = "0.7.35"
@ -2216,6 +2475,49 @@ dependencies = [
"syn 2.0.79",
]
[[package]]
name = "zerofrom"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
"synstructure",
]
[[package]]
name = "zerovec"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
dependencies = [
"yoke",
"zerofrom",
"zerovec-derive",
]
[[package]]
name = "zerovec-derive"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.79",
]
[[package]]
name = "zstd"
version = "0.13.2"

View file

@ -16,6 +16,8 @@ tiktoken-rs = "0.5.9"
rand = "0.8.5"
serde_json = "1.0"
hex = "0.4.3"
urlencoding = "2.1.3"
url = "2.5.4"
[dev-dependencies]
pretty_assertions = "1.4.1"

View file

@ -25,6 +25,7 @@ pub struct Configuration {
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct Overrides {
pub prompt_target_intent_matching_threshold: Option<f64>,
pub optimize_context_window: Option<bool>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
@ -242,6 +243,7 @@ pub struct EndpointDetails {
pub path: Option<String>,
#[serde(rename = "http_method")]
pub method: Option<HttpMethod>,
pub http_headers: Option<HashMap<String, String>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View file

@ -1,21 +1,30 @@
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use url::Url;
use urlencoding;
use crate::configuration::Parameter;
pub fn replace_params_in_path(
path: &str,
params: &HashMap<String, String>,
) -> Result<String, String> {
let mut result = String::new();
let mut in_param = false;
tool_params: &HashMap<String, String>,
prompt_target_params: &[Parameter],
) -> Result<(String, String, HashMap<String, String>), String> {
let mut query_string_replaced = String::new();
let mut current_param = String::new();
let mut vars_replaced = HashSet::new();
let mut params: HashMap<String, String> = HashMap::new();
let mut in_param = false;
for c in path.chars() {
if c == '{' {
in_param = true;
} else if c == '}' {
in_param = false;
let param_name = current_param.clone();
if let Some(value) = params.get(&param_name) {
result.push_str(value);
if let Some(value) = tool_params.get(&param_name) {
let value = urlencoding::encode(value);
query_string_replaced.push_str(value.into_owned().as_str());
vars_replaced.insert(param_name.clone());
} else {
return Err(format!("Missing value for parameter `{}`", param_name));
}
@ -23,31 +32,106 @@ pub fn replace_params_in_path(
} else if in_param {
current_param.push(c);
} else {
result.push(c);
query_string_replaced.push(c);
}
}
Ok(result)
// add the remaining params in path
for (param_name, value) in tool_params.iter() {
let value = urlencoding::encode(value).into_owned();
if !vars_replaced.contains(param_name) {
vars_replaced.insert(param_name.clone());
params.insert(param_name.clone(), value.clone());
if query_string_replaced.contains("?") {
query_string_replaced.push_str(&format!("&{}={}", param_name, value));
} else {
query_string_replaced.push_str(&format!("?{}={}", param_name, value));
}
}
}
// add default values
for param in prompt_target_params.iter() {
if !vars_replaced.contains(&param.name) && param.default.is_some() {
params.insert(param.name.clone(), param.default.clone().unwrap());
if query_string_replaced.contains("?") {
query_string_replaced.push_str(&format!(
"&{}={}",
param.name,
param.default.as_ref().unwrap()
));
} else {
query_string_replaced.push_str(&format!(
"?{}={}",
param.name,
param.default.as_ref().unwrap()
));
}
}
}
let parsed_uri = Url::parse("http://dummy.com").unwrap();
let parsed_uri = parsed_uri
.join(&query_string_replaced)
.map_err(|e| e.to_string())?;
let query_string = parsed_uri.query().unwrap_or("");
let path_uri = parsed_uri.path();
Ok((path_uri.to_string(), query_string.to_string(), params))
}
#[cfg(test)]
mod test {
use std::collections::HashMap;
use crate::configuration::Parameter;
#[test]
fn test_replace_path() {
let path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
let params = vec![("cluster_name".to_string(), "test1".to_string())]
.into_iter()
.collect();
let params = vec![
("cluster_name".to_string(), "test1".to_string()),
("hello".to_string(), "hello world".to_string()),
]
.into_iter()
.collect();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let out_params: HashMap<String, String> = vec![
("country".to_string(), "US".to_string()),
("hello".to_string(), "hello%20world".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/cluster.open-cluster-management.io/v1/managedclusters/test1".to_string(),
"hello=hello%20world&country=US".to_string(),
out_params.clone()
))
);
let out_params = HashMap::new();
let prompt_target_params = vec![];
let path = "/cluster.open-cluster-management.io/v1/managedclusters";
let params = vec![].into_iter().collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/cluster.open-cluster-management.io/v1/managedclusters".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/cluster.open-cluster-management.io/v1/managedclusters".to_string(),
"".to_string(),
out_params
))
);
let path = "/foo/{bar}/baz";
@ -55,8 +139,8 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/foo/qux/baz".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok(("/foo/qux/baz".to_string(), "".to_string(), HashMap::new()))
);
let path = "/foo/{bar}/baz/{qux}";
@ -67,8 +151,45 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
Ok("/foo/qux/baz/quux".to_string())
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}?hello=world";
let params = vec![
("bar".to_string(), "qux".to_string()),
("qux".to_string(), "quux".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"hello=world".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}?hello={hello}";
let params = vec![
("bar".to_string(), "qux".to_string()),
("qux".to_string(), "quux".to_string()),
("hello".to_string(), "hello world".to_string()),
]
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params, &prompt_target_params),
Ok((
"/foo/qux/baz/quux".to_string(),
"hello=hello%20world".to_string(),
HashMap::new()
))
);
let path = "/foo/{bar}/baz/{qux}";
@ -76,7 +197,7 @@ mod test {
.into_iter()
.collect();
assert_eq!(
super::replace_params_in_path(path, &params),
super::replace_params_in_path(path, &params, &prompt_target_params),
Err("Missing value for parameter `qux`".to_string())
);
}

File diff suppressed because it is too large Load diff

View file

@ -79,6 +79,11 @@ impl RootContext for FilterContext {
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
trace!(
"||| create_http_context called with context_id: {:?} |||",
context_id
);
Some(Box::new(StreamContext::new(
context_id,
Rc::clone(&self.metrics),

View file

@ -87,7 +87,7 @@ impl StreamContext {
));
debug!(
"llm provider hint: {:?}, selected llm: {}",
"request received: llm provider hint: {:?}, selected llm: {}",
self.get_http_request_header(ARCH_PROVIDER_HINT_HEADER),
self.llm_provider.as_ref().unwrap().name
);
@ -309,6 +309,12 @@ impl HttpContext for StreamContext {
}
fn on_http_response_headers(&mut self, _num_headers: usize, _end_of_stream: bool) -> Action {
trace!(
"on_http_response_headers [S={}] end_stream={}",
self.context_id,
_end_of_stream
);
self.set_property(
vec!["metadata", "filter_metadata", "llm_filter", "user_prompt"],
Some("hello world from filter".as_bytes()),
@ -318,6 +324,13 @@ impl HttpContext for StreamContext {
}
fn on_http_response_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
trace!(
"on_http_response_body [S={}] bytes={} end_stream={}",
self.context_id,
body_size,
end_of_stream
);
if !self.is_chat_completions_request {
debug!("non-chatcompletion request");
return Action::Continue;
@ -517,8 +530,11 @@ impl HttpContext for StreamContext {
let chat_completions_response: ChatCompletionsResponse =
match serde_json::from_str(body_utf8.as_str()) {
Ok(de) => de,
Err(_e) => {
debug!("invalid response: {}", body_utf8);
Err(err) => {
debug!(
"non chat-completion compliant response received err: {}, body: {}",
err, body_utf8
);
return Action::Continue;
}
};

View file

@ -22,12 +22,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider-hint"),
)
.returning(Some("default"))
.expect_log(
Some(LogLevel::Debug),
Some("llm provider hint: Some(Default)"),
)
.expect_log(Some(LogLevel::Debug), Some("selected llm: open-ai-gpt-4"))
.returning(None)
.expect_log(Some(LogLevel::Debug), Some("request received: llm provider hint: Some(\"default\"), selected llm: open-ai-gpt-4"))
.expect_add_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider"),
@ -38,7 +34,11 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
Some("Authorization"),
Some("Bearer secret_key"),
)
.expect_remove_header_map_value(Some(MapType::HttpRequestHeaders), Some("content-length"))
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-llm-provider-hint"),
)
.returning(Some("default"))
.expect_get_header_map_value(
Some(MapType::HttpRequestHeaders),
Some("x-arch-ratelimit-selector"),
@ -50,7 +50,6 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
.returning(None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some(":path"))
.returning(Some("/v1/chat/completions"))
.expect_log(Some(LogLevel::Debug), None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
.returning(None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
@ -62,7 +61,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -187,7 +186,10 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(
Some(LogLevel::Trace),
Some("||| create_http_context called with context_id: 2 |||"),
)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -218,9 +220,9 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 21)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
@ -251,7 +253,7 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -339,9 +341,9 @@ fn llm_gateway_request_ratelimited() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 107)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
@ -405,9 +407,9 @@ fn llm_gateway_request_not_ratelimited() {
.returning(Some(chat_completions_request_body))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_record("input_sequence_length", 29)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)

File diff suppressed because it is too large Load diff

View file

@ -27,21 +27,34 @@ impl Context for StreamContext {
.get_http_call_response_body(0, body_size)
.unwrap_or_default();
let http_status = self
.get_http_call_response_header(":status")
.unwrap_or(StatusCode::OK.as_str().to_string());
if http_status != StatusCode::OK.as_str() {
let server_error = ServerError::Upstream {
host: callout_context.upstream_cluster.unwrap(),
path: callout_context.upstream_cluster_path.unwrap(),
status: http_status.clone(),
body: String::from_utf8(body).unwrap(),
};
warn!("filter received non 2xx code: {:?}", server_error);
return self.send_server_error(
server_error,
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
if let Some(http_status) = self.get_http_call_response_header(":status") {
match StatusCode::from_str(http_status.as_str()) {
Ok(status_code) => {
if !status_code.is_success() {
let server_error = ServerError::Upstream {
host: callout_context.upstream_cluster.unwrap(),
path: callout_context.upstream_cluster_path.unwrap(),
status: http_status.clone(),
body: String::from_utf8(body).unwrap(),
};
warn!("received non 2xx code: {:?}", server_error);
return self.send_server_error(
server_error,
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
}
}
Err(_) => {
// invalid status code (status code non numeric)
return self.send_server_error(
ServerError::LogicError(format!("invalid status code: {}", http_status)),
Some(StatusCode::from_str(http_status.as_str()).unwrap()),
);
}
}
} else {
// :status header not found
warn!("missing :status header");
}
#[cfg_attr(any(), rustfmt::skip)]

View file

@ -3,6 +3,7 @@ use crate::stream_context::StreamContext;
use common::configuration::{Configuration, Overrides, PromptGuards, PromptTarget, Tracing};
use common::http::Client;
use common::stats::Gauge;
use log::trace;
use proxy_wasm::traits::*;
use proxy_wasm::types::*;
use std::cell::RefCell;
@ -83,6 +84,11 @@ impl RootContext for FilterContext {
}
fn create_http_context(&self, context_id: u32) -> Option<Box<dyn HttpContext>> {
trace!(
"||| create_http_context called with context_id: {:?} |||",
context_id
);
Some(Box::new(StreamContext::new(
context_id,
Rc::clone(&self.metrics),

View file

@ -78,10 +78,7 @@ impl HttpContext for StreamContext {
}
};
debug!(
"developer => archgw: {}",
String::from_utf8_lossy(&body_bytes)
);
trace!("request body: {}", String::from_utf8_lossy(&body_bytes));
// Deserialize body into spec.
// Currently OpenAI API.
@ -133,9 +130,23 @@ impl HttpContext for StreamContext {
.map(|(_, pt)| pt.into())
.collect();
let mut metadata = deserialized_body.metadata.clone();
if let Some(overrides) = self.overrides.as_ref() {
if overrides.optimize_context_window.unwrap_or_default() {
if metadata.is_none() {
metadata = Some(HashMap::new());
}
metadata
.as_mut()
.unwrap()
.insert("optimize_context_window".to_string(), "true".to_string());
}
}
let arch_fc_chat_completion_request = ChatCompletionsRequest {
messages: deserialized_body.messages.clone(),
metadata: deserialized_body.metadata.clone(),
metadata,
stream: deserialized_body.stream,
model: "--".to_string(),
stream_options: deserialized_body.stream_options.clone(),
@ -152,7 +163,8 @@ impl HttpContext for StreamContext {
}
};
debug!("archgw => archfc: {}", json_data);
debug!("sending request to model server");
trace!("request body: {}", json_data);
let mut headers = vec![
(ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME),

View file

@ -7,6 +7,7 @@ mod filter_context;
mod http_context;
mod metrics;
mod stream_context;
mod tools;
proxy_wasm::main! {{
proxy_wasm::set_log_level(LogLevel::Trace);

View file

@ -1,4 +1,5 @@
use crate::metrics::Metrics;
use crate::tools::compute_request_path_body;
use common::api::open_ai::{
to_server_events, ArchState, ChatCompletionStreamResponse, ChatCompletionsRequest,
ChatCompletionsResponse, Message, ModelServerResponse, ToolCall,
@ -14,9 +15,8 @@ use common::http::{CallArgs, Client};
use common::stats::Gauge;
use derivative::Derivative;
use http::StatusCode;
use log::{debug, warn};
use log::{debug, trace, warn};
use proxy_wasm::traits::*;
use serde_yaml::Value;
use std::cell::RefCell;
use std::collections::HashMap;
use std::rc::Rc;
@ -46,7 +46,7 @@ pub struct StreamCallContext {
pub struct StreamContext {
system_prompt: Rc<Option<String>>,
pub prompt_targets: Rc<HashMap<String, PromptTarget>>,
_overrides: Rc<Option<Overrides>>,
pub overrides: Rc<Option<Overrides>>,
pub metrics: Rc<Metrics>,
pub callouts: RefCell<HashMap<u32, StreamCallContext>>,
pub context_id: u32,
@ -89,7 +89,7 @@ impl StreamContext {
streaming_response: false,
user_prompt: None,
is_chat_completions_request: false,
_overrides: overrides,
overrides: overrides,
request_id: None,
traceparent: None,
_tracing: tracing,
@ -125,13 +125,14 @@ impl StreamContext {
mut callout_context: StreamCallContext,
) {
let body_str = String::from_utf8(body).unwrap();
debug!("archgw <= archfc response: {}", body_str);
debug!("model server response received");
trace!("response body: {}", body_str);
let model_server_response: ModelServerResponse = match serde_json::from_str(&body_str) {
Ok(arch_fc_response) => arch_fc_response,
Err(e) => {
warn!(
"error deserializing archfc response: {}, body: {}",
"error deserializing modelserver response: {}, body: {}",
e, body_str
);
return self.send_server_error(ServerError::Deserialization(e), None);
@ -141,7 +142,7 @@ impl StreamContext {
let arch_fc_response = match model_server_response {
ModelServerResponse::ChatCompletionsResponse(response) => response,
ModelServerResponse::ModelServerErrorResponse(response) => {
debug!("archgw <= archfc error response: {}", response.result);
debug!("archgw <= modelserver error response: {}", response.result);
if response.result == "No intent matched" {
if let Some(default_prompt_target) = self
.prompt_targets
@ -272,85 +273,76 @@ impl StreamContext {
fn schedule_api_call_request(&mut self, mut callout_context: StreamCallContext) {
let tools_call_name = self.tool_calls.as_ref().unwrap()[0].function.name.clone();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap();
let tool_params = &self.tool_calls.as_ref().unwrap()[0].function.arguments;
let endpoint_details = prompt_target.endpoint.as_ref().unwrap();
let endpoint_path: String = endpoint_details
.path
.as_ref()
.unwrap_or(&String::from("/"))
.to_string();
let prompt_target = self.prompt_targets.get(&tools_call_name).unwrap().clone();
let http_method = endpoint_details.method.clone().unwrap_or_default();
let prompt_target_params = prompt_target.parameters.clone().unwrap_or_default();
let mut tool_params = self.tool_calls.as_ref().unwrap()[0]
.function
.arguments
.clone();
tool_params.insert(
String::from(MESSAGES_KEY),
serde_yaml::to_value(&callout_context.request_body.messages).unwrap(),
);
let tool_params_json_str = serde_json::to_string(&tool_params).unwrap();
let endpoint = prompt_target.endpoint.unwrap();
let path: String = endpoint.path.unwrap_or(String::from("/"));
// only add params that are of string, number and bool type
let url_params = tool_params
.iter()
.filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
.map(|(key, value)| match value {
Value::Number(n) => (key.clone(), n.to_string()),
Value::String(s) => (key.clone(), s.clone()),
Value::Bool(b) => (key.clone(), b.to_string()),
Value::Null => todo!(),
Value::Sequence(_) => todo!(),
Value::Mapping(_) => todo!(),
Value::Tagged(_) => todo!(),
})
.collect::<HashMap<String, String>>();
let path = match common::path::replace_params_in_path(&path, &url_params) {
Ok(path) => path,
let (path, body) = match compute_request_path_body(
&endpoint_path,
tool_params,
&prompt_target_params,
&http_method,
) {
Ok((path, body)) => (path, body),
Err(e) => {
return self.send_server_error(
ServerError::BadRequest {
why: format!("error replacing params in path: {}", e),
why: format!("error computing api request path or body: {}", e),
},
Some(StatusCode::BAD_REQUEST),
);
}
};
let http_method = endpoint.method.unwrap_or_default().to_string();
let mut headers = vec![
(ARCH_UPSTREAM_HOST_HEADER, endpoint.name.as_str()),
(":method", &http_method),
let http_method_str = http_method.to_string();
let mut headers: HashMap<_, _> = [
(ARCH_UPSTREAM_HOST_HEADER, endpoint_details.name.as_str()),
(":method", &http_method_str),
(":path", &path),
(":authority", endpoint.name.as_str()),
(":authority", endpoint_details.name.as_str()),
("content-type", "application/json"),
("x-envoy-max-retries", "3"),
];
]
.into_iter()
.collect();
if self.request_id.is_some() {
headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap()));
headers.insert(REQUEST_ID_HEADER, self.request_id.as_ref().unwrap());
}
if self.traceparent.is_some() {
headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap()));
headers.insert(TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap());
}
// override http headers that are set in the prompt target
let http_headers = endpoint_details.http_headers.clone().unwrap_or_default();
for (key, value) in http_headers.iter() {
headers.insert(key.as_str(), value.as_str());
}
let call_args = CallArgs::new(
ARCH_INTERNAL_CLUSTER_NAME,
&path,
headers,
Some(tool_params_json_str.as_bytes()),
headers.into_iter().collect(),
body.as_deref().map(|s| s.as_bytes()),
vec![],
Duration::from_secs(5),
);
debug!(
"archgw => api call, endpoint: {}{}, body: {}",
endpoint.name.as_str(),
path,
tool_params_json_str
"dispatching api call to developer endpoint: {}, path: {}, method: {}",
endpoint_details.name, path, http_method_str
);
callout_context.upstream_cluster = Some(endpoint.name.to_owned());
callout_context.upstream_cluster = Some(endpoint_details.name.to_owned());
callout_context.upstream_cluster_path = Some(path.to_owned());
callout_context.response_handler_type = ResponseHandlerType::FunctionCall;
@ -363,6 +355,10 @@ impl StreamContext {
let http_status = self
.get_http_call_response_header(":status")
.unwrap_or(StatusCode::OK.as_str().to_string());
debug!(
"developer api call response received: status code: {}",
http_status
);
if http_status != StatusCode::OK.as_str() {
warn!(
"api server responded with non 2xx status code: {}",
@ -379,8 +375,8 @@ impl StreamContext {
);
}
self.tool_call_response = Some(String::from_utf8(body).unwrap());
debug!(
"archgw <= api call response: {}",
trace!(
"response body: {}",
self.tool_call_response.as_ref().unwrap()
);
@ -430,7 +426,8 @@ impl StreamContext {
return self.send_server_error(ServerError::Serialization(e), None);
}
};
debug!("archgw => llm request: {}", llm_request_str);
debug!("sending request to upstream llm");
trace!("request body: {}", llm_request_str);
self.start_upstream_llm_request_time = SystemTime::now()
.duration_since(UNIX_EPOCH)

View file

@ -0,0 +1,157 @@
use common::configuration::{HttpMethod, Parameter};
use std::collections::HashMap;
use serde_yaml::Value;
// only add params that are of string, number and bool type
pub fn filter_tool_params(tool_params: &HashMap<String, Value>) -> HashMap<String, String> {
tool_params
.iter()
.filter(|(_, value)| value.is_number() || value.is_string() || value.is_bool())
.map(|(key, value)| match value {
Value::Number(n) => (key.clone(), n.to_string()),
Value::String(s) => (key.clone(), s.clone()),
Value::Bool(b) => (key.clone(), b.to_string()),
Value::Null => todo!(),
Value::Sequence(_) => todo!(),
Value::Mapping(_) => todo!(),
Value::Tagged(_) => todo!(),
})
.collect::<HashMap<String, String>>()
}
pub fn compute_request_path_body(
endpoint_path: &str,
tool_params: &HashMap<String, Value>,
prompt_target_params: &[Parameter],
http_method: &HttpMethod,
) -> Result<(String, Option<String>), String> {
let tool_url_params = filter_tool_params(tool_params);
let (path_with_params, query_string, additional_params) = common::path::replace_params_in_path(
endpoint_path,
&tool_url_params,
prompt_target_params,
)?;
let (path, body) = match http_method {
HttpMethod::Get => (format!("{}?{}", path_with_params, query_string), None),
HttpMethod::Post => {
let mut additional_params = additional_params;
if !query_string.is_empty() {
query_string.split("&").for_each(|param| {
let mut parts = param.split("=");
let key = parts.next().unwrap();
let value = parts.next().unwrap();
additional_params.insert(key.to_string(), value.to_string());
});
}
let body = serde_json::to_string(&additional_params).unwrap();
(path_with_params, Some(body))
}
};
Ok((path, body))
}
#[cfg(test)]
mod test {
use common::configuration::{HttpMethod, Parameter};
#[test]
fn test_compute_request_path_body() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/{cluster_name}";
let tool_params = serde_yaml::from_str(
r#"
cluster_name: test1
hello: hello world
"#,
)
.unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/test1?hello=hello%20world&country=US"
);
assert_eq!(body, None);
}
#[test]
fn test_compute_request_path_body_empty_params() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
let tool_params = serde_yaml::from_str(r#"{}"#).unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/?country=US"
);
assert_eq!(body, None);
}
#[test]
fn test_compute_request_path_body_override_default_val() {
let endpoint_path = "/cluster.open-cluster-management.io/v1/managedclusters/";
let tool_params = serde_yaml::from_str(
r#"
country: UK
"#,
)
.unwrap();
let prompt_target_params = vec![Parameter {
name: "country".to_string(),
parameter_type: None,
description: "test target".to_string(),
required: None,
enum_values: None,
default: Some("US".to_string()),
in_path: None,
format: None,
}];
let http_method = HttpMethod::Get;
let (path, body) = super::compute_request_path_body(
endpoint_path,
&tool_params,
&prompt_target_params,
&http_method,
)
.unwrap();
assert_eq!(
path,
"/cluster.open-cluster-management.io/v1/managedclusters/?country=UK"
);
assert_eq!(body, None);
}
}

View file

@ -41,7 +41,7 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -87,8 +87,9 @@ fn normal_flow(module: &mut Tester, filter_context: i32, http_context: i32) {
None,
)
.returning(Some(1))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_metric_increment("active_http_calls", 1)
.execute_and_expect(ReturnType::Action(Action::Pause))
@ -203,7 +204,7 @@ fn prompt_gateway_successful_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -234,8 +235,9 @@ fn prompt_gateway_successful_request_to_open_ai_chat_completions() {
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_http_call(Some("arch_internal"), None, None, None, None)
.returning(Some(4))
@ -267,7 +269,7 @@ fn prompt_gateway_bad_request_to_open_ai_chat_completions() {
module
.call_proxy_on_context_create(http_context, filter_context)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::None)
.unwrap();
@ -302,7 +304,7 @@ fn prompt_gateway_bad_request_to_open_ai_chat_completions() {
None,
None,
)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.execute_and_expect(ReturnType::Action(Action::Pause))
.unwrap();
}
@ -363,29 +365,30 @@ fn prompt_gateway_request_to_llm_gateway() {
metadata: None,
};
let expected_body = "{\"city\":\"seattle\"}";
let arch_fc_resp_str = serde_json::to_string(&arch_fc_resp).unwrap();
module
.call_proxy_on_http_call_response(http_context, 1, 0, arch_fc_resp_str.len() as i32, 0)
.expect_metric_increment("active_http_calls", -1)
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&arch_fc_resp_str))
.expect_log(Some(LogLevel::Warn), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_http_call(
Some("arch_internal"),
Some(vec![
("x-arch-upstream", "api_server"),
(":method", "POST"),
(":path", "/weather"),
(":authority", "api_server"),
("content-type", "application/json"),
("x-arch-upstream", "api_server"),
(":authority", "api_server"),
("x-envoy-max-retries", "3"),
(":path", "/weather"),
]),
None,
Some(expected_body),
None,
None,
)
@ -401,13 +404,12 @@ fn prompt_gateway_request_to_llm_gateway() {
.expect_get_buffer_bytes(Some(BufferType::HttpCallResponseBody))
.returning(Some(&body_text))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Trace), None)
.expect_get_header_map_value(Some(MapType::HttpCallResponseHeaders), Some(":status"))
.returning(Some("200"))
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_set_buffer_bytes(Some(BufferType::HttpRequestBody), None)
.execute_and_expect(ReturnType::None)
.unwrap();

View file

@ -1,19 +0,0 @@
FROM python:3 AS base
FROM base AS builder
WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
FROM python:3-slim AS output
COPY --from=builder /runtime /usr/local
COPY . /app
WORKDIR /app
CMD ["uvicorn", "insurance_agent_main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]

View file

@ -1,58 +0,0 @@
# Insurance Agent Demo
This demo showcases how the **Arch** can be used to manage insurance-related tasks such as policy inquiries, initiating policies, and updating claims or deductibles. In this demo, the assistant provides factual information related to insurance policies (e.g., car, boat, house, motorcycle).
The system can perform a variety of tasks, such as answering insurance-related questions, retrieving policy coverage details, initiating policies, and updating claims or deductibles.
## Available Functions:
- **Policy Q/A**: Handles general Q&A related to insurance policies.
- **Endpoint**: `/policy/qa`
- This function answers general inquiries related to insurance, such as coverage details or policy types. It is the default target for insurance-related queries.
- **Get Policy Coverage**: Retrieves the coverage details for a given policy type (car, boat, house, motorcycle).
- **Endpoint**: `/policy/coverage`
- Parameters:
- `policy_type` (required): The type of policy. Available options: `car`, `boat`, `house`, `motorcycle`. Defaults to `car`.
- **Initiate Policy**: Starts a policy coverage for car, boat, motorcycle, or house.
- **Endpoint**: `/policy/initiate`
- Parameters:
- `policy_type` (required): The type of policy. Available options: `car`, `boat`, `house`, `motorcycle`. Defaults to `car`.
- `deductible` (required): The deductible amount set for the policy.
- **Update Claim**: Updates the notes on a specific insurance claim.
- **Endpoint**: `/policy/claim`
- Parameters:
- `claim_id` (required): The claim number.
- `notes` (optional): Notes about the claim number for the adjustor to see.
- **Update Deductible**: Updates the deductible amount for a specific policy coverage.
- **Endpoint**: `/policy/deductible`
- Parameters:
- `policy_id` (required): The ID of the policy.
- `deductible` (required): The deductible amount to be set for the policy.
**Arch** is designed to intelligently routes prompts to the appropriate functions based on the target, allowing for seamless interaction with various insurance-related services.
# Starting the demo
1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
2. Start Arch
```sh
sh run_demo.sh
```
3. Navigate to http://localhost:18080/
4. Tell me what can you do for me?"
# Observability
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
1. Start grafana and prometheus using following command
```yaml
docker compose --profile monitoring up
```
1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
Here is sample interaction,
<img width="575" alt="image" src="https://github.com/user-attachments/assets/25d40f46-616e-41ea-be8e-1623055c84ec">

View file

@ -1,105 +0,0 @@
version: v0.1
listener:
address: 127.0.0.1
port: 8080 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
system_prompt: |
You are an insurance assistant that just offers guidance related to car, boat, rental and home insurnace only. Please be pricese and summarize based on the context provided.
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o
default: true
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
app_server:
# value could be ip address or a hostname with port
# this could also be a list of endpoints for load balancing
# for example endpoint: [ ip1:port, ip2:port ]
endpoint: host.docker.internal:18083
# max time to wait for a connection to be established
connect_timeout: 0.05s
prompt_targets:
- name: policy_qa
endpoint:
name: app_server
path: /policy/qa
http_method: POST
description: Handle general Q/A related to insurance.
default: true
- name: get_policy_coverage
description: Retrieve the coverage details for an insurance policy.
endpoint:
name: app_server
path: /policy/coverage
http_method: POST
parameters:
- name: policy_type
type: str
description: The type of policy
default: car
required: true
- name: initiate_policy
endpoint:
name: app_server
path: /policy/initiate
http_method: POST
description: Start a policy coverage for an insurance policy
parameters:
- name: policy_type
type: str
description: The type of policy
default: car
required: true
- name: deductible
type: float
description: the deductible amount set of the policy
required: true
- name: update_claim
endpoint:
name: app_server
path: /policy/claim
http_method: POST
description: Update the notes on the claim
parameters:
- name: claim_id
type: str
description: the claim number
required: true
- name: notes
type: str
description: notes about the cliam number for your adjustor to see
required: false
- name: update_deductible
endpoint:
name: app_server
path: /policy/deductible
http_method: POST
description: Update the deductible amount for a specific insurance policy coverage.
parameters:
- name: policy_id
type: str
description: The id of the insurance policy
required: true
- name: deductible
type: float
description: the deductible amount set of the policy
required: true
ratelimits:
- model: gpt-4
selector:
key: selector-key
value: selector-value
limit:
tokens: 1
unit: minute

View file

@ -1,140 +0,0 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel, Field
app = FastAPI()
class Conversation(BaseModel):
arch_messages: list
class PolicyCoverageRequest(BaseModel):
policy_type: str = Field(
...,
description="The type of a policy held by the customer For, e.g. car, boat, house, motorcycle)",
)
class PolicyInitiateRequest(PolicyCoverageRequest):
deductible: float = Field(
..., description="The deductible amount set of the policy"
)
class ClaimUpdate(BaseModel):
claim_id: str
notes: str # Status or details of the claim
class DeductibleUpdate(BaseModel):
policy_id: str
deductible: float
class CoverageResponse(BaseModel):
policy_type: str
coverage: str # Description of coverage
premium: float # The premium cost
# Get information about policy coverage
@app.post("/policy/coverage", response_model=CoverageResponse)
async def get_policy_coverage(req: PolicyCoverageRequest):
"""
Retrieve the coverage details for a given policy type (car, boat, house, motorcycle).
"""
policy_coverage = {
"car": {
"coverage": "Full car coverage with collision, liability",
"premium": 500.0,
},
"boat": {
"coverage": "Full boat coverage including theft and storm damage",
"premium": 700.0,
},
"house": {
"coverage": "Full house coverage including fire, theft, flood",
"premium": 1000.0,
},
"motorcycle": {
"coverage": "Full motorcycle coverage with liability",
"premium": 400.0,
},
}
if req.policy_type not in policy_coverage:
raise HTTPException(status_code=404, detail="Policy type not found")
return CoverageResponse(
policy_type=req.policy_type,
coverage=policy_coverage[req.policy_type]["coverage"],
premium=policy_coverage[req.policy_type]["premium"],
)
# Initiate policy coverage
@app.post("/policy/initiate")
async def initiate_policy(policy_request: PolicyInitiateRequest):
"""
Initiate policy coverage for a car, boat, house, or motorcycle.
"""
if policy_request.policy_type not in ["car", "boat", "house", "motorcycle"]:
raise HTTPException(status_code=400, detail="Invalid policy type")
return {
"message": f"Policy initiated for {policy_request.policy_type}",
"deductible": policy_request.deductible,
}
# Update claim details
@app.post("/policy/claim")
async def update_claim(req: ClaimUpdate):
"""
Update the status or details of a claim.
"""
# For simplicity, this is a mock update response
return {
"message": f"Claim {claim_update.claim_id} for policy {claim_update.claim_id} has been updated",
"update": claim_update.notes,
}
# Update deductible amount
@app.post("/policy/deductible")
async def update_deductible(deductible_update: DeductibleUpdate):
"""
Update the deductible amount for a specific policy.
"""
# For simplicity, this is a mock update response
return {
"message": f"Deductible for policy {deductible_update.policy_id} has been updated",
"new_deductible": deductible_update.deductible,
}
# Post method for policy Q/A
@app.post("/policy/qa")
async def policy_qa(conversation: Conversation):
"""
This method handles Q/A related to general issues in insurance.
It forwards the conversation to the OpenAI client via a local proxy and returns the response.
"""
return {
"choices": [
{
"message": {
"role": "assistant",
"content": "I am a helpful insurance agent, and can only help with insurance things",
},
"finish_reason": "completed",
"index": 0,
}
],
"model": "insurance_agent",
"usage": {"completion_tokens": 0},
}
# Run the app using:
# uvicorn main:app --reload

View file

@ -1,4 +0,0 @@
fastapi
uvicorn
pydantic
openai

View file

@ -1,12 +0,0 @@
apiVersion: 1
providers:
- name: "Dashboard provider"
orgId: 1
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: /var/lib/grafana/dashboards
foldersFromFilesStructure: true

View file

@ -1,355 +0,0 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"links": [],
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_time_sum[1m]) / rate(envoy_cluster_internal_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - internal (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_time_sum[1m]) / rate(envoy_cluster_external_upstream_rq_time_count[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "request latency - external (ms)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_internal_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
},
{
"datasource": {
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "avg(rate(envoy_cluster_external_upstream_rq_completed[1m])) by (envoy_cluster_name)",
"fullMetaSearch": false,
"hide": false,
"includeNullMetadata": true,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B",
"useBackend": false
}
],
"title": "Upstream request count",
"type": "timeseries"
}
],
"schemaVersion": 39,
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Intelligent Gateway Overview",
"uid": "adt6uhx5lk8aob",
"version": 3,
"weekStart": ""
}

View file

@ -1,9 +0,0 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
url: http://prometheus:9090
isDefault: true
access: proxy
editable: true

View file

@ -1,253 +0,0 @@
import logging
import random
import re
import sqlite3
from datetime import datetime, timedelta, timezone
import pandas as pd
from dateparser import parse
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)
def loadsql():
# Example Usage
conn = sqlite3.connect(":memory:")
# create and load the devices table
device_data = generate_device_data(conn)
# create and load the interface_stats table
generate_interface_stats_data(conn, device_data)
# create and load the flow table
generate_flow_data(conn, device_data)
return conn
# Function to convert natural language time expressions to "X {time} ago" format
def convert_to_ago_format(expression):
# Define patterns for different time units
time_units = {
r"seconds": "seconds",
r"minutes": "minutes",
r"mins": "mins",
r"hrs": "hrs",
r"hours": "hours",
r"hour": "hour",
r"hr": "hour",
r"days": "days",
r"day": "day",
r"weeks": "weeks",
r"week": "week",
r"months": "months",
r"month": "month",
r"years": "years",
r"yrs": "years",
r"year": "year",
r"yr": "year",
}
# Iterate over each time unit and create regex for each phrase format
for pattern, unit in time_units.items():
# Handle "for the past X {unit}"
match = re.search(rf"(\d+) {pattern}", expression)
if match:
quantity = match.group(1)
return f"{quantity} {unit} ago"
# If the format is not recognized, return None or raise an error
return None
# Function to generate random MAC addresses
def random_mac():
return "AA:BB:CC:DD:EE:" + ":".join(
[f"{random.randint(0, 255):02X}" for _ in range(2)]
)
# Function to generate random IP addresses
def random_ip():
return f"""{random.randint(1, 255)}
.{random.randint(1, 255)}
.{random.randint(1, 255)}
.{random.randint(1, 255)}"""
# Generate synthetic data for the device table
def generate_device_data(
conn,
n=1000,
):
device_data = {
"switchip": [random_ip() for _ in range(n)],
"hwsku": [f"HW{i+1}" for i in range(n)],
"hostname": [f"switch{i+1}" for i in range(n)],
"osversion": [f"v{i+1}" for i in range(n)],
"layer": ["L2" if i % 2 == 0 else "L3" for i in range(n)],
"region": [random.choice(["US", "EU", "ASIA"]) for _ in range(n)],
"uptime": [
f"""{random.randint(0, 10)} days {random.randint(0, 23)}
:{random.randint(0, 59)}:{random.randint(0, 59)}"""
for _ in range(n)
],
"device_mac_address": [random_mac() for _ in range(n)],
}
df = pd.DataFrame(device_data)
df.to_sql("device", conn, index=False)
return df
# Generate synthetic data for the interfacestats table
def generate_interface_stats_data(conn, device_df, n=1000):
interface_stats_data = []
for _ in range(n):
device_mac = random.choice(device_df["device_mac_address"])
ifname = random.choice(["eth0", "eth1", "eth2", "eth3"])
time = datetime.now(timezone.utc) - timedelta(
minutes=random.randint(0, 1440 * 5)
) # random timestamps in the past 5 day
in_discards = random.randint(0, 1000)
in_errors = random.randint(0, 500)
out_discards = random.randint(0, 800)
out_errors = random.randint(0, 400)
in_octets = random.randint(1000, 100000)
out_octets = random.randint(1000, 100000)
interface_stats_data.append(
{
"device_mac_address": device_mac,
"ifname": ifname,
"time": time,
"in_discards": in_discards,
"in_errors": in_errors,
"out_discards": out_discards,
"out_errors": out_errors,
"in_octets": in_octets,
"out_octets": out_octets,
}
)
df = pd.DataFrame(interface_stats_data)
df.to_sql("interfacestats", conn, index=False)
# Generate synthetic data for the ts_flow table
def generate_flow_data(conn, device_df, n=1000):
flow_data = []
for _ in range(n):
sampler_address = random.choice(device_df["switchip"])
proto = random.choice(["TCP", "UDP"])
src_addr = random_ip()
dst_addr = random_ip()
src_port = random.randint(1024, 65535)
dst_port = random.randint(1024, 65535)
in_if = random.randint(1, 10)
out_if = random.randint(1, 10)
flow_start = int(
(datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
)
flow_end = int(
(datetime.now() - timedelta(days=random.randint(1, 30))).timestamp()
)
bytes_transferred = random.randint(1000, 100000)
packets = random.randint(1, 1000)
flow_time = datetime.now(timezone.utc) - timedelta(
minutes=random.randint(0, 1440 * 5)
) # random flow time
flow_data.append(
{
"sampler_address": sampler_address,
"proto": proto,
"src_addr": src_addr,
"dst_addr": dst_addr,
"src_port": src_port,
"dst_port": dst_port,
"in_if": in_if,
"out_if": out_if,
"flow_start": flow_start,
"flow_end": flow_end,
"bytes": bytes_transferred,
"packets": packets,
"time": flow_time,
}
)
df = pd.DataFrame(flow_data)
df.to_sql("ts_flow", conn, index=False)
def load_params(req):
# Step 1: Convert the from_time natural language string to a timestamp if provided
if req.from_time:
# Use `dateparser` to parse natural language timeframes
logger.info("%s\n\nCaptured from time: %s\n\n", "* " * 50, req.from_time)
parsed_time = parse(req.from_time, settings={"RELATIVE_BASE": datetime.now()})
if not parsed_time:
conv_time = convert_to_ago_format(req.from_time)
if conv_time:
parsed_time = parse(
conv_time, settings={"RELATIVE_BASE": datetime.now()}
)
else:
return {
"error": """Invalid from_time format. Please provide a valid time description
such as 'past 7 days' or 'since last month'."""
}
logger.info("\n\nConverted from time: %s\n\n%s\n\n", parsed_time, "* " * 50)
from_time = parsed_time
logger.info("Using parsed from_time: %f", from_time)
else:
# If no from_time is provided, use a default value (e.g., the past 7 days)
from_time = datetime.now() - timedelta(days=7)
logger.info("Using default from_time: %f", from_time)
# Step 2: Build the dynamic SQL query based on the optional filters
filters = []
params = {"from_time": from_time}
if req.ifname:
filters.append("i.ifname = :ifname")
params["ifname"] = req.ifname
if req.region:
filters.append("d.region = :region")
params["region"] = req.region
if req.min_in_errors is not None:
filters.append("i.in_errors >= :min_in_errors")
params["min_in_errors"] = req.min_in_errors
if req.max_in_errors is not None:
filters.append("i.in_errors <= :max_in_errors")
params["max_in_errors"] = req.max_in_errors
if req.min_out_errors is not None:
filters.append("i.out_errors >= :min_out_errors")
params["min_out_errors"] = req.min_out_errors
if req.max_out_errors is not None:
filters.append("i.out_errors <= :max_out_errors")
params["max_out_errors"] = req.max_out_errors
if req.min_in_discards is not None:
filters.append("i.in_discards >= :min_in_discards")
params["min_in_discards"] = req.min_in_discards
if req.max_in_discards is not None:
filters.append("i.in_discards <= :max_in_discards")
params["max_in_discards"] = req.max_in_discards
if req.min_out_discards is not None:
filters.append("i.out_discards >= :min_out_discards")
params["min_out_discards"] = req.min_out_discards
if req.max_out_discards is not None:
filters.append("i.out_discards <= :max_out_discards")
params["max_out_discards"] = req.max_out_discards
return params, filters

View file

@ -0,0 +1,18 @@
# Stage 1: Build the application using Maven
FROM maven:3.8.7-openjdk-18-slim AS build
WORKDIR /app
# Copy pom.xml and download dependencies first (caching)
COPY pom.xml .
RUN mvn dependency:go-offline
# Copy the source code and build the application
COPY src ./src
RUN mvn clean package -DskipTests
# Stage 2: Run the application using a slim JDK image
FROM openjdk:17-jdk-slim
WORKDIR /app
# Copy the built jar from the previous stage
COPY --from=build /app/target/weather-forecast-service-0.0.1-SNAPSHOT.jar app.jar
# Expose the port on which the app runs (default Spring Boot is 8080)
EXPOSE 8081
ENTRYPOINT ["java", "-jar", "app.jar"]

View file

@ -0,0 +1,45 @@
version: v0.1
listener:
address: 127.0.0.1
port: 10000 #If you configure port 443, you'll need to update the listener with tls_certificates
message_format: huggingface
# Centralized way to manage LLMs, manage keys, retry logic, failover and limits in a central way
llm_providers:
- name: OpenAI
provider_interface: openai
access_key: $OPENAI_API_KEY
model: gpt-4o-mini
default: true
# Arch creates a round-robin load balancing between different endpoints, managed via the cluster subsystem.
endpoints:
weather_forecast_service:
# value could be ip address or a hostname with port
# this could also be a list of endpoints for load balancing
# for example endpoint: [ ip1:port, ip2:port ]
endpoint: host.docker.internal:18081
# max time to wait for a connection to be established
connect_timeout: 0.005s
# default system prompt used by all prompt targets
system_prompt: |
You are a helpful weather assistant.
prompt_targets:
- name: weather_forecast
description: get the weather forecast
parameters:
- name: location
description: the location for which to get the weather forecast
required: true
type: string
format: City, State
- name: days
description: the number of days for the forecast
required: true
type: int
endpoint:
name: weather_forecast_service
path: /weather
http_method: POST

View file

@ -1,18 +1,14 @@
services:
api_server:
weather_forecast_service:
build:
context: .
dockerfile: Dockerfile
ports:
- "18083:80"
healthcheck:
test: ["CMD", "curl" ,"http://localhost:80/healthz"]
interval: 5s
retries: 20
- "18081:8081"
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"

View file

@ -0,0 +1,40 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>weather</groupId>
<artifactId>weather-forecast-service</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.7.10</version>
<relativePath/>
</parent>
<dependencies>
<!-- Spring Boot Starter Web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Spring Boot Maven Plugin -->
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>

View file

@ -0,0 +1,12 @@
// File: src/main/java/com/example/weather/WeatherForecastApplication.java
package weather;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class WeatherForecastApplication {
public static void main(String[] args) {
SpringApplication.run(WeatherForecastApplication.class, args);
}
}

View file

@ -0,0 +1,54 @@
package weather.controller;
import weather.model.DayForecast;
import weather.model.WeatherForecastResponse;
import weather.model.WeatherRequest;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RestController;
import java.time.Instant;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
@RestController
public class WeatherController {
private Random random = new Random();
@PostMapping("/weather")
public WeatherForecastResponse getRandomWeatherForecast(@RequestBody WeatherRequest req) {
WeatherForecastResponse response = new WeatherForecastResponse();
response.setLocation(req.getLocation());
response.setUnits(req.getUnits());
List<DayForecast> forecasts = new ArrayList<>();
for (int i = 0; i < req.getDays(); i++) {
// Generate a random min temperature between 50 and 89 (inclusive)
int minTemp = random.nextInt(90 - 50) + 50;
// Generate a max temperature between (minTemp + 5) and (minTemp + 19)
int maxTemp = random.nextInt(15) + (minTemp + 5);
double finalMinTemp = minTemp;
double finalMaxTemp = maxTemp;
// Convert to Celsius if necessary
if (req.getUnits().equalsIgnoreCase("celsius") || req.getUnits().equalsIgnoreCase("c")) {
finalMinTemp = (minTemp - 32) * 5.0 / 9.0;
finalMaxTemp = (maxTemp - 32) * 5.0 / 9.0;
}
DayForecast dayForecast = new DayForecast();
dayForecast.setDate(LocalDate.now().plusDays(i).toString());
dayForecast.setMin(finalMinTemp);
dayForecast.setMax(finalMaxTemp);
dayForecast.setUnits(req.getUnits());
forecasts.add(dayForecast);
}
response.setDailyForecast(forecasts);
return response;
}
}

View file

@ -0,0 +1,40 @@
package weather.model;
public class DayForecast {
private String date;
private String units;
private double min;
private double max;
public DayForecast() {}
// Getters and setters
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getUnits() {
return units;
}
public void setUnits(String units) {
this.units = units;
}
public double getMin() {
return min;
}
public void setMin(double min) {
this.min = min;
}
public double getMax() {
return max;
}
public void setMax(double max) {
this.max = max;
}
}

View file

@ -0,0 +1,37 @@
package weather.model;
import java.util.List;
public class WeatherForecastResponse {
private String location;
private String units;
private List<DayForecast> forecast;
// Default Constructor
public WeatherForecastResponse() {}
// Getters and Setters
public String getLocation() {
return location;
}
public void setLocation(String location) {
this.location = location;
}
public String getUnits() {
return units;
}
public void setUnits(String units) {
this.units = units;
}
public List<DayForecast> getDailyForecast() {
return forecast;
}
public void setDailyForecast(List<DayForecast> forecast) {
this.forecast = forecast;
}
}

View file

@ -0,0 +1,29 @@
package weather.model;
public class WeatherRequest {
private String location;
private int days = 7;
private String units = "Farenheit";
public WeatherRequest() {}
// Getters and setters
public String getLocation() {
return location;
}
public void setLocation(String location) {
this.location = location;
}
public int getDays() {
return days;
}
public void setDays(int days) {
this.days = days;
}
public String getUnits() {
return units;
}
public void setUnits(String units) {
this.units = units;
}
}

View file

@ -12,6 +12,11 @@ llm_providers:
provider_interface: openai
model: gpt-4o
endpoints:
frankfurther_api:
endpoint: api.frankfurter.dev
protocol: https
system_prompt: |
You are a helpful assistant.
@ -26,7 +31,7 @@ prompt_targets:
description: Get currency exchange rate from USD to other currencies
parameters:
- name: currency_symbol
description: the currency that needs conversion
description: currency symbol to convert from USD
required: true
type: str
in_path: true
@ -42,11 +47,6 @@ prompt_targets:
name: frankfurther_api
path: /v1/currencies
endpoints:
frankfurther_api:
endpoint: api.frankfurter.dev:443
protocol: https
tracing:
random_sampling: 100
trace_arch_internal: true

View file

@ -1,7 +1,7 @@
services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -14,7 +14,7 @@ services:
jaeger:
build:
context: ../shared/jaeger
context: ../../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"

View file

@ -8,7 +8,6 @@ services:
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
- ../shared/chatbot_ui/common.py:/app/common.py
ports:
- "18080:80"
healthcheck:
@ -18,7 +17,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"

View file

Before

Width:  |  Height:  |  Size: 549 KiB

After

Width:  |  Height:  |  Size: 549 KiB

Before After
Before After

View file

@ -12,7 +12,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"

View file

Before

Width:  |  Height:  |  Size: 852 KiB

After

Width:  |  Height:  |  Size: 852 KiB

Before After
Before After

View file

@ -7,13 +7,13 @@ WORKDIR /src
COPY requirements.txt /src/
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
COPY . /src
COPY ../. /src
FROM python:3.12-slim AS output
COPY --from=builder /runtime /usr/local
COPY . /app
COPY ../. /app
WORKDIR /app
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-level", "info"]

View file

@ -8,7 +8,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"

View file

Before

Width:  |  Height:  |  Size: 636 KiB

After

Width:  |  Height:  |  Size: 636 KiB

Before After
Before After

View file

@ -24,6 +24,7 @@ start_demo() {
# Step 4: Start Network Agent
echo "Starting Network Agent using Docker Compose..."
cd build
docker compose up -d # Run in detached mode
}

View file

@ -0,0 +1,9 @@
This demo shows how you can use a publicly hosted rest api that is protected by an access key.
Before you start the demo make sure you set `OPENAI_API_KEY` and `TWELVEDATA_API_KEY`.
To get `TWELVEDATA_API_KEY` please head over to https://twelvedata.com/.
Following screenshot shows interaction with stock quote demo,
![alt text](stock_quote_demo.png)

View file

@ -0,0 +1,69 @@
version: v0.1
listener:
address: 0.0.0.0
port: 10000
message_format: huggingface
connect_timeout: 0.005s
llm_providers:
- name: gpt-4o
access_key: $OPENAI_API_KEY
provider_interface: openai
model: gpt-4o
endpoints:
twelvedata_api:
endpoint: api.twelvedata.com
protocol: https
system_prompt: |
You are a helpful assistant.
prompt_guards:
input_guards:
jailbreak:
on_exception:
message: Looks like you're curious about my abilities, but I can only provide assistance for currency exchange.
prompt_targets:
- name: stock_quote
description: get current stock exchange rate for a given symbol
parameters:
- name: symbol
description: Stock symbol
required: true
type: str
endpoint:
name: twelvedata_api
path: /quote
http_headers:
Authorization: "apikey $TWELVEDATA_API_KEY"
system_prompt: |
You are a helpful stock exchange assistant. You are given stock symbol along with its exchange rate in json format. Your task is to parse the data and present it in a human-readable format. Keep the details to highlevel and be concise.
- name: stock_quote_time_series
description: get historical stock exchange rate for a given symbol
parameters:
- name: symbol
description: Stock symbol
required: true
type: str
- name: interval
description: Time interval
default: 1day
enum:
- 1h
- 1day
type: str
endpoint:
name: twelvedata_api
path: /time_series
http_headers:
Authorization: "apikey $TWELVEDATA_API_KEY"
system_prompt: |
You are a helpful stock exchange assistant. You are given stock symbol along with its historical data in json format. Your task is to parse the data and present it in a human-readable format. Keep the details to highlevel only and be concise.
tracing:
random_sampling: 100
trace_arch_internal: true

View file

@ -0,0 +1,21 @@
services:
chatbot_ui:
build:
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
# this is only because we are running the sample app in the same docker container environment as archgw
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
jaeger:
build:
context: ../../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"

View file

@ -22,7 +22,7 @@ start_demo() {
echo "Starting Arch with arch_config.yaml..."
archgw up arch_config.yaml
# Step 4: Start Network Agent
# Step 4: Start developer services
echo "Starting Network Agent using Docker Compose..."
docker compose up -d # Run in detached mode
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 673 KiB

View file

@ -11,7 +11,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -24,12 +24,12 @@ services:
otel-collector:
build:
context: ../shared/honeycomb/
context: ../../shared/honeycomb/
ports:
- "4317:4317"
- "4318:4318"
volumes:
- ../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml
- ../../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml
env_file:
- .env
environment:
@ -37,10 +37,10 @@ services:
prometheus:
build:
context: ../shared/prometheus
context: ../../shared/prometheus
grafana:
build:
context: ../shared/grafana
context: ../../shared/grafana
ports:
- "3000:3000"

View file

@ -11,7 +11,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -24,7 +24,7 @@ services:
jaeger:
build:
context: ../shared/jaeger
context: ../../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"
@ -32,10 +32,10 @@ services:
prometheus:
build:
context: ../shared/prometheus
context: ../../shared/prometheus
grafana:
build:
context: ../shared/grafana
context: ../../shared/grafana
ports:
- "3000:3000"

View file

@ -11,7 +11,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -24,12 +24,12 @@ services:
otel-collector:
build:
context: ../shared/logfire/
context: ../../shared/logfire/
ports:
- "4317:4317"
- "4318:4318"
volumes:
- ../shared/logfire/otel-collector-config.yaml:/etc/otel-collector-config.yaml
- ../../shared/logfire/otel-collector-config.yaml:/etc/otel-collector-config.yaml
env_file:
- .env
environment:
@ -37,10 +37,10 @@ services:
prometheus:
build:
context: ../shared/prometheus
context: ../../shared/prometheus
grafana:
build:
context: ../shared/grafana
context: ../../shared/grafana
ports:
- "3000:3000"

View file

@ -1,5 +1,5 @@
include:
- ../shared/signoz/docker-compose-minimal.yaml
- ../../shared/signoz/docker-compose-minimal.yaml
services:
weather_forecast_service:
@ -14,7 +14,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -27,10 +27,10 @@ services:
prometheus:
build:
context: ../shared/prometheus
context: ../../shared/prometheus
grafana:
build:
context: ../shared/grafana
context: ../../shared/grafana
ports:
- "3000:3000"

View file

@ -11,7 +11,7 @@ services:
chatbot_ui:
build:
context: ../shared/chatbot_ui
context: ../../shared/chatbot_ui
ports:
- "18080:8080"
environment:
@ -19,23 +19,3 @@ services:
- CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1
extra_hosts:
- "host.docker.internal:host-gateway"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
jaeger:
build:
context: ../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
prometheus:
build:
context: ../shared/prometheus
grafana:
build:
context: ../shared/grafana
ports:
- "3000:3000"

Some files were not shown because too many files have changed in this diff Show more