From 452084423cf4c52f0816794a5b76aa5ade24b4c5 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Fri, 17 Jan 2025 18:47:26 -0800 Subject: [PATCH 01/11] add PR to release 0.1.9 (#371) --- README.md | 2 +- arch/tools/README.md | 2 +- arch/tools/poetry.lock | 24 ++++++++++++++++++------ arch/tools/pyproject.toml | 4 ++-- docs/source/conf.py | 2 +- docs/source/get_started/quickstart.rst | 2 +- model_server/pyproject.toml | 2 +- 7 files changed, 25 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 184aecaf..ab91e5c1 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. ```console $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate -$ pip install archgw==0.1.8 +$ pip install archgw==0.1.9 ``` ### Build AI Agent with Arch Gateway diff --git a/arch/tools/README.md b/arch/tools/README.md index f8cfc0b3..a84e423c 100644 --- a/arch/tools/README.md +++ b/arch/tools/README.md @@ -19,7 +19,7 @@ source venv/bin/activate ### Step 3: Run the build script ```bash -pip install archgw==0.1.8 +pip install archgw==0.1.9 ``` ## Uninstall Instructions: archgw CLI diff --git a/arch/tools/poetry.lock b/arch/tools/poetry.lock index 81ab0a39..f3cf27bd 100644 --- a/arch/tools/poetry.lock +++ b/arch/tools/poetry.lock @@ -2,7 +2,7 @@ [[package]] name = "archgw_modelserver" -version = "0.1.8" +version = "0.1.9" description = "A model server for serving models" optional = false python-versions = "*" @@ -370,18 +370,19 @@ files = [ [[package]] name = "referencing" -version = "0.35.1" +version = "0.36.1" description = "JSON Referencing + Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, - {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, + {file = "referencing-0.36.1-py3-none-any.whl", hash = "sha256:363d9c65f080d0d70bc41c721dce3c7f3e77fc09f269cd5c8813da18069a6794"}, + {file = "referencing-0.36.1.tar.gz", hash = "sha256:ca2e6492769e3602957e9b831b94211599d2aade9477f5d44110d2530cf9aade"}, ] [package.dependencies] attrs = ">=22.2.0" rpds-py = ">=0.7.0" +typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.13\""} [[package]] name = "requests" @@ -536,6 +537,17 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] +[[package]] +name = "typing-extensions" +version = "4.12.2" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, + {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, +] + [[package]] name = "urllib3" version = "2.3.0" @@ -556,4 +568,4 @@ zstd = ["zstandard (>=0.18.0)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "ab027b62af1cbf96ef77b4db808824d66b09b065f1b18d215bdc6110eb766d10" +content-hash = "6de280d17cc7131265568e89c10c22af1a5e4ac5a99d5ce0585135d95119ee64" diff --git a/arch/tools/pyproject.toml b/arch/tools/pyproject.toml index 008c967c..08be8b86 100644 --- a/arch/tools/pyproject.toml +++ b/arch/tools/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw" -version = "0.1.8" +version = "0.1.9" description = "Python-based CLI tool to manage Arch Gateway." authors = ["Katanemo Labs, Inc."] packages = [ @@ -10,7 +10,7 @@ readme = "README.md" [tool.poetry.dependencies] python = "^3.10" -archgw_modelserver = "^0.1.8" +archgw_modelserver = "^0.1.9" click = "^8.1.7" jinja2 = "^3.1.4" jsonschema = "^4.23.0" diff --git a/docs/source/conf.py b/docs/source/conf.py index 8359b195..db84f3ec 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons project = "Arch Docs" copyright = "2025, Katanemo Labs, Inc" author = "Katanemo Labs, Inc" -release = " v0.1.8" +release = " v0.1.9" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst index fd9f592c..aa4deae2 100644 --- a/docs/source/get_started/quickstart.rst +++ b/docs/source/get_started/quickstart.rst @@ -25,7 +25,7 @@ Arch's CLI allows you to manage and interact with the Arch gateway efficiently. $ python -m venv venv $ source venv/bin/activate # On Windows, use: venv\Scripts\activate - $ pip install archgw==0.1.8 + $ pip install archgw==0.1.9 Build AI Agent with Arch Gateway diff --git a/model_server/pyproject.toml b/model_server/pyproject.toml index 63c5a045..a7ee92ee 100644 --- a/model_server/pyproject.toml +++ b/model_server/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "archgw_modelserver" -version = "0.1.8" +version = "0.1.9" description = "A model server for serving models" authors = ["Katanemo Labs, Inc "] license = "Apache 2.0" From c8b5137d373d09ad13bca090e259d62b2c284af4 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 13:56:09 -0800 Subject: [PATCH 02/11] updated README based on feedback on reddit (#372) * updated README based on feedback on reddit * fixed typo --------- Co-authored-by: Salman Paracha --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index ab91e5c1..d0bc7ad9 100644 --- a/README.md +++ b/README.md @@ -7,19 +7,18 @@ [![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml) [![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml) -## Build fast, observable, and personalized AI agents. +## Fast, observable, and personalized agentic applciations. -Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize AI agents with your APIs. +Arch is an intelligent proxy server designed for prompts - to help you protect, observe, and build fully agentic experiences by simply writing APIs. Built on (and by the contributors of) [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: -Engineered with purpose-built LLMs, Arch handles the critical but undifferentiated tasks related to the handling and processing of prompts, including detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligently calling "backend" APIs to fulfill the user's request represented in a prompt, routing to and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM API calls in a centralized way. +>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* - Arch is built on (and by the core contributors of) [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is engineered with purpose-built LLMs to handle critical but undifferentiated tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligent task routing for improved accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way. ->Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – all outside business logic.* **Core Features**: - Built on [Envoy](https://envoyproxy.io): Arch runs alongside application servers as a separate containerized process, and builds on top of Envoy's proven HTTP management and scalability features to handle ingress and egress traffic related to prompts and LLMs. - - Intent Routing & Fast Function Calling. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build smarter more accurate agentic applications. + - Task Routing & Fast Function Calling. Engineered with purpose-built [LLMs](https://huggingface.co/collections/katanemo/arch-function-66f209a693ea8df14317ad68) to handle fast, cost-effective, and accurate prompt-based tasks like function/API calling, and parameter extraction from prompts to build more task-accurate agentic applications. - Prompt [Guard](https://huggingface.co/collections/katanemo/arch-guard-6702bdc08b889e4bce8f446d): Arch centralizes guardrails to prevent jailbreak attempts and ensure safe user interactions without writing a single line of code. - Routing & Traffic Management: Arch centralizes calls to LLMs used by your applications, offering smart retries, automatic cutover, and resilient upstream connections for continuous availability. - Observability: Arch uses the W3C Trace Context standard to enable complete request tracing across applications, ensuring compatibility with observability tools, and provides metrics to monitor latency, token usage, and error rates, helping optimize AI application performance. From e2ec2f6bb88489e4b3b22e0a9322ba2713b0101d Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 14:44:40 -0800 Subject: [PATCH 03/11] Salmanap/fix readme 019a (#373) * updated README based on feedback on reddit * fixed typo * updating README with minor fixes * more fixes to README * updated README * updated README * updated README --------- Co-authored-by: Salman Paracha --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d0bc7ad9..8e27af9f 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build fully agentic apps, by integrating with (existing) backend APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: + +>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* + ![alt text](docs/source/_static/img/arch-logo.png) Arch - Build fast, hyper-personalized agents with intelligent infra | Product Hunt @@ -7,11 +11,6 @@ [![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml) [![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml) -## Fast, observable, and personalized agentic applciations. - -Arch is an intelligent proxy server designed for prompts - to help you protect, observe, and build fully agentic experiences by simply writing APIs. Built on (and by the contributors of) [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: - ->Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* Arch is engineered with purpose-built LLMs to handle critical but undifferentiated tasks related to the handling and processing of prompts. This includes detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligent task routing for improved accuracy, mapping user request into "backend" functions, and managing the observability of prompts and LLM API calls in a centralized way. From 0fe0e775eea5d6ef27ab21d153e630b7d7e5c714 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 14:45:51 -0800 Subject: [PATCH 04/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e27af9f..c706fa33 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build fully agentic apps, by integrating with (existing) backend APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps, by seamlessly integrating with (existing) backend APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: >Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* From 966901d2a57b37d10dc22d01881f6fdb0d1def5b Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 14:46:14 -0800 Subject: [PATCH 05/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c706fa33..650a2ad4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps, by seamlessly integrating with (existing) backend APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps, by seamlessly integrating with (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: >Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* From 8d1f132b75b3394a738d1f7e459fd8f15b4b1f12 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 15:02:43 -0800 Subject: [PATCH 06/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 650a2ad4..455f672e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps, by seamlessly integrating with (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and quickly build agentic apps by effortlessly integrating (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: >Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* From 4bbf6c382e48137df47e21f0239d2e3429e093d1 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Mon, 20 Jan 2025 15:03:19 -0800 Subject: [PATCH 07/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 455f672e..b7fa3de6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and quickly build agentic apps by effortlessly integrating (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps by effortlessly integrating (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: >Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* From bea0dd4a830d10794255a002770c1a356bbcc7f3 Mon Sep 17 00:00:00 2001 From: Salman Paracha Date: Tue, 21 Jan 2025 10:56:17 -0800 Subject: [PATCH 08/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b7fa3de6..9cc250ad 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps by effortlessly integrating (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: +Arch is an **intelligent proxy server designed for prompts** - to help you protect, observe, and build agentic apps by simply integrating (existing) APIs. Built by the contributors of [Envoy Proxy](https://www.envoyproxy.io/) with the belief that: >Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization – outside core business logic.* From fcd8cfb9fc2c8dcd89a0f8b373cc96bc53af8091 Mon Sep 17 00:00:00 2001 From: Aayush <82300923+aayushwhiz@users.noreply.github.com> Date: Tue, 21 Jan 2025 17:15:27 -0800 Subject: [PATCH 09/11] add in honeycomb support for weather-forecast demo (#345) --- demos/shared/honeycomb/Dockerfile | 5 ++ .../honeycomb/otel-collector-config.yaml | 24 ++++++++++ .../docker-compose-honeycomb.yaml | 46 +++++++++++++++++++ demos/weather_forecast/run_demo.sh | 41 +++++++++++------ 4 files changed, 101 insertions(+), 15 deletions(-) create mode 100644 demos/shared/honeycomb/Dockerfile create mode 100644 demos/shared/honeycomb/otel-collector-config.yaml create mode 100644 demos/weather_forecast/docker-compose-honeycomb.yaml diff --git a/demos/shared/honeycomb/Dockerfile b/demos/shared/honeycomb/Dockerfile new file mode 100644 index 00000000..eec82c08 --- /dev/null +++ b/demos/shared/honeycomb/Dockerfile @@ -0,0 +1,5 @@ +FROM otel/opentelemetry-collector:latest + +COPY otel-collector-config.yaml /etc/otel-collector-config.yaml + +ENTRYPOINT ["/otelcol", "--config=/etc/otel-collector-config.yaml"] diff --git a/demos/shared/honeycomb/otel-collector-config.yaml b/demos/shared/honeycomb/otel-collector-config.yaml new file mode 100644 index 00000000..e261be84 --- /dev/null +++ b/demos/shared/honeycomb/otel-collector-config.yaml @@ -0,0 +1,24 @@ +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +exporters: + otlp: + endpoint: "api.honeycomb.io:443" + headers: + "x-honeycomb-team": "${HONEYCOMB_API_KEY}" + +processors: + batch: + timeout: 5s + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp] diff --git a/demos/weather_forecast/docker-compose-honeycomb.yaml b/demos/weather_forecast/docker-compose-honeycomb.yaml new file mode 100644 index 00000000..9f81fa69 --- /dev/null +++ b/demos/weather_forecast/docker-compose-honeycomb.yaml @@ -0,0 +1,46 @@ +services: + weather_forecast_service: + build: + context: ./ + environment: + - OLTP_HOST=http://otel-collector:4317 + extra_hosts: + - "host.docker.internal:host-gateway" + ports: + - "18083:80" + + chatbot_ui: + build: + context: ../shared/chatbot_ui + ports: + - "18080:8080" + environment: + # this is only because we are running the sample app in the same docker container environment as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml + + otel-collector: + build: + context: ../shared/honeycomb/ + ports: + - "4317:4317" + - "4318:4318" + volumes: + - ../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml + env_file: + - .env + environment: + - HONEYCOMB_API_KEY + + prometheus: + build: + context: ../shared/prometheus + + grafana: + build: + context: ../shared/grafana + ports: + - "3000:3000" diff --git a/demos/weather_forecast/run_demo.sh b/demos/weather_forecast/run_demo.sh index e764c9fa..a6f846fe 100644 --- a/demos/weather_forecast/run_demo.sh +++ b/demos/weather_forecast/run_demo.sh @@ -11,18 +11,21 @@ load_env() { # Function to determine the docker-compose file based on the argument get_compose_file() { case "$1" in - jaeger) - echo "docker-compose-jaeger.yaml" - ;; - logfire) - echo "docker-compose-logfire.yaml" - ;; - signoz) - echo "docker-compose-signoz.yaml" - ;; - *) - echo "docker-compose.yaml" - ;; + jaeger) + echo "docker-compose-jaeger.yaml" + ;; + logfire) + echo "docker-compose-logfire.yaml" + ;; + signoz) + echo "docker-compose-signoz.yaml" + ;; + honeycomb) + echo "docker-compose-honeycomb.yaml" + ;; + *) + echo "docker-compose.yaml" + ;; esac } @@ -44,12 +47,16 @@ start_demo() { echo "Error: LOGFIRE_API_KEY environment variable is required for Logfire." exit 1 fi + if [ "$1" == "honeycomb" ] && [ -z "$HONEYCOMB_API_KEY" ]; then + echo "Error: HONEYCOMB_API_KEY environment variable is required for Honeycomb." + exit 1 + fi # Create .env file echo "Creating .env file..." - echo "OPENAI_API_KEY=$OPENAI_API_KEY" > .env + echo "OPENAI_API_KEY=$OPENAI_API_KEY" >.env if [ "$1" == "logfire" ]; then - echo "LOGFIRE_API_KEY=$LOGFIRE_API_KEY" >> .env + echo "LOGFIRE_API_KEY=$LOGFIRE_API_KEY" >>.env fi echo ".env file created with required API keys." fi @@ -60,6 +67,10 @@ start_demo() { echo "Error: LOGFIRE_API_KEY environment variable is required for Logfire." exit 1 fi + if [ "$1" == "honeycomb" ] && [ -z "$HONEYCOMB_API_KEY" ]; then + echo "Error: HONEYCOMB_API_KEY environment variable is required for Honeycomb." + exit 1 + fi # Step 4: Start Arch echo "Starting Arch with arch_config.yaml..." @@ -67,7 +78,7 @@ start_demo() { # Step 5: Start Network Agent with the chosen Docker Compose file echo "Starting Network Agent with $COMPOSE_FILE..." - docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode + docker compose -f "$COMPOSE_FILE" up -d # Run in detached mode } # Function to stop the demo From 6887d527502d78daf5f7ccd44a65a5cde8ff3d63 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Tue, 21 Jan 2025 18:01:56 -0800 Subject: [PATCH 10/11] When using ollama token count was not coming in (#375) When using ollama token count was not coming in resulting in token count and other metrics to show up as zero. This was not causing tracing to break. --- crates/llm_gateway/src/stream_context.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/crates/llm_gateway/src/stream_context.rs b/crates/llm_gateway/src/stream_context.rs index 39d4c58f..6939f1d8 100644 --- a/crates/llm_gateway/src/stream_context.rs +++ b/crates/llm_gateway/src/stream_context.rs @@ -483,11 +483,14 @@ impl HttpContext for StreamContext { let tokens_str = chat_completions_chunk_response_events.to_string(); //HACK: add support for tokenizing mistral and other models //filed issue https://github.com/katanemo/arch/issues/222 - if model.as_ref().unwrap().starts_with("mistral") - || model.as_ref().unwrap().starts_with("ministral") - { - model = Some("gpt-4".to_string()); + if !model.as_ref().unwrap().starts_with("gpt") { + warn!( + "tiktoken_rs: unsupported model: {}, using gpt-4 to compute token count", + model.as_ref().unwrap() + ); } + model = Some("gpt-4".to_string()); + let token_count = match tokenizer::token_count(model.as_ref().unwrap().as_str(), tokens_str.as_str()) { From 6740a0995295305eed3958f0bdd315a405a89280 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Wed, 22 Jan 2025 14:02:59 -0800 Subject: [PATCH 11/11] add docker-compose file for honeycomb tracing (#377) --- .../docker-compose_honeycomb.yaml | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 demos/currency_exchange_ollama/docker-compose_honeycomb.yaml diff --git a/demos/currency_exchange_ollama/docker-compose_honeycomb.yaml b/demos/currency_exchange_ollama/docker-compose_honeycomb.yaml new file mode 100644 index 00000000..3c46c7cf --- /dev/null +++ b/demos/currency_exchange_ollama/docker-compose_honeycomb.yaml @@ -0,0 +1,26 @@ +services: + chatbot_ui: + build: + context: ../shared/chatbot_ui + ports: + - "18080:8080" + environment: + # this is only because we are running the sample app in the same docker container environemtn as archgw + - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + extra_hosts: + - "host.docker.internal:host-gateway" + volumes: + - ./arch_config.yaml:/app/arch_config.yaml + + otel-collector: + build: + context: ../shared/honeycomb/ + ports: + - "4317:4317" + - "4318:4318" + volumes: + - ../shared/honeycomb/otel-collector-config.yaml:/etc/otel-collector-config.yaml + env_file: + - .env + environment: + - HONEYCOMB_API_KEY=${HONEYCOMB_API_KEY:?error}