Merge commit '097513ee60' into adil/release_0.1.3

2026-06-17 15:25:17 +02:00 · 2024-11-17 17:03:00 -08:00 · 2024-11-17 17:03:00 -08:00 · 689b462d01
commit 689b462d01
parent c0a9595bbf 097513ee60
13 changed files with 48 additions and 15 deletions
--- a/README.md
+++ b/README.md
@ -7,9 +7,9 @@
 [![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml)
 [![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml)

-## Build fast, robust, and personalized AI agents.
+## Build fast, observable, and personalized AI agents.

-Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize LLM applications (agents, assistants, co-pilots) with your APIs.
+Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize AI agents (assistants, co-pilots) with your APIs.

 Engineered with purpose-built LLMs, Arch handles the critical but undifferentiated tasks related to the handling and processing of prompts, including detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligently calling "backend" APIs to fulfill the user's request represented in a prompt, routing to and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM interactions in a centralized way.

@ -156,6 +156,8 @@ print("OpenAI Response:", response.choices[0].message.content)
 ### [Observability](https://docs.archgw.com/guides/observability/observability.html)
 Arch is designed to support best-in class observability by supporting open standards. Please read our [docs](https://docs.archgw.com/guides/observability/observability.html) on observability for more details on tracing, metrics, and logs

+![alt text](docs/source/_static/img/tracing.png)
+
 ### Contribution
 We would love feedback on our [Roadmap](https://github.com/orgs/katanemo/projects/1) and we welcome contributions to **Arch**!
 Whether you're fixing bugs, adding new features, improving documentation, or creating tutorials, your help is much appreciated.
--- a/crates/llm_gateway/src/stream_context.rs
+++ b/crates/llm_gateway/src/stream_context.rs
@ -40,6 +40,7 @@ pub struct StreamContext {
    ttft_duration: Option<Duration>,
    ttft_time: Option<SystemTime>,
    pub traceparent: Option<String>,
+    request_body_sent_time: Option<SystemTime>,
    user_message: Option<Message>,
 }

@ -60,6 +61,7 @@ impl StreamContext {
            traceparent: None,
            ttft_time: None,
            user_message: None,
+            request_body_sent_time: None,
        }
    }
    fn llm_provider(&self) -> &LlmProvider {
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
    fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
        // Let the client send the gateway all the data before sending to the LLM_provider.
        // TODO: consider a streaming API.
+
+        if self.request_body_sent_time.is_none() {
+            self.request_body_sent_time = Some(get_current_time().unwrap());
+        }
+
        if !end_of_stream {
            return Action::Pause;
        }
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
                    "upstream_llm_time".to_string(),
                    parent_trace_id.to_string(),
                    Some(parent_span_id.to_string()),
-                    self.start_time
+                    self.request_body_sent_time
                        .unwrap()
                        .duration_since(UNIX_EPOCH)
                        .unwrap()
--- a/crates/llm_gateway/tests/integration.rs
+++ b/crates/llm_gateway/tests/integration.rs
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
        .expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(incomplete_chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        .expect_log(Some(LogLevel::Debug), None)
        .expect_send_local_response(
            Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
        )
        .expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
        .returning(Some(chat_completions_request_body))
+        .expect_get_current_time_nanos()
+        .returning(Some(0))
        // The actual call is not important in this test, we just need to grab the token_id
        .expect_log(Some(LogLevel::Trace), None)
        .expect_log(Some(LogLevel::Debug), None)
--- a/docs/source/_static/img/tracing.png
+++ b/docs/source/_static/img/tracing.png
--- a/docs/source/concepts/tech_overview/listener.rst
+++ b/docs/source/concepts/tech_overview/listener.rst
@ -21,8 +21,8 @@ Upstream (Egress)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
 When you start Arch, it creates a listener for egress traffic based on the presence of the ``listener`` configuration
-section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:9000/v1`` or a DNS-based
-address like ``arch.local:9000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.
+section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:12000/v1`` or a DNS-based
+address like ``arch.local:12000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.

 Configure Listener
 ^^^^^^^^^^^^^^^^^^
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
 project = "Arch Docs"
 copyright = "2024, Katanemo Labs, Inc"
 author = "Katanemo Labs, Inc"
-release = " v0.1"
+release = " v0.1.2"

 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
--- a/docs/source/guides/observability/tracing.rst
+++ b/docs/source/guides/observability/tracing.rst
@ -23,6 +23,10 @@ flow, enabling **end-to-end tracing** across the entire application. By using Op
 that developers can capture this trace data consistently and in a format compatible with various observability
 tools.

+.. image:: /_static/img/tracing.png
+   :width: 100%
+   :align: center
+

 Benefits of Using ``Traceparent`` Headers
 -----------------------------------------
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@ -9,10 +9,12 @@ Welcome to Arch!

   <div style="text-align: center; font-size: 1.25rem;">
   <br>
-   <p>Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</p>
+   <p>Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> GenAI apps</p>
   </div>

-Arch (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) was born out of the belief that:
+   <a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
+
+`Arch <https://github.com/katanemo/arch>`_ is an intelligent infrastructure primitive for GenAI (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) that born out of the belief that:

  *Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization - all outside business logic.*

--- a/e2e_tests/api_llm_gateway.rest
+++ b/e2e_tests/api_llm_gateway.rest
--- a/e2e_tests/api_model_server.rest
+++ b/e2e_tests/api_model_server.rest
--- a/e2e_tests/api_prompt_gateway.rest
+++ b/e2e_tests/api_prompt_gateway.rest
--- a/e2e_tests/tracing.rest
+++ b/e2e_tests/tracing.rest
--- a/www/index.html
+++ b/www/index.html
@ -58,7 +58,7 @@
            display: block;
        }
        div.bold-text {
-            font-size: 1.5rem;
+            font-size: 1.4rem;
            margin-bottom: 5px;
            line-height: 3rem;
        }
@ -170,27 +170,37 @@
        }
    </style>
 </head>
+<!-- Google tag (gtag.js) -->
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-F1XYQ9H653"></script>
+<script>
+  window.dataLayer = window.dataLayer || [];
+  function gtag(){dataLayer.push(arguments);}
+  gtag('js', new Date());
+
+  gtag('config', 'G-F1XYQ9H653');
+</script>
 <body>
    <header>
        <a href="https://github.com/katanemo/arch">GitHub</a>
        <a href="https://docs.archgw.com">Docs</a>
-        <a href="https://discord.gg/rSRQ9fv7">Discord</a>
+        <a href="https://discord.gg/pGZf2gcwEc">Discord</a>
        <a href="https://github.com/katanemo/arch?tab=readme-ov-file#contact">Contact</a>
    </header>
    <div class="container">
        <div class="image-placeholder">
            <img src="https://storage.googleapis.com/arch-website-assets/arch-logo.png" alt="Arch Gateway Logo" title="Arch Gateway Logo">
        </div>
-        <div class="bold-text">Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</div>
-        <div class="subheading">An <a href="https://github.com/katanemo/arch">open source</a> gateway engineered with (fast) LLMs to handle the <b>muck</b> of prompts.</div>
+        <a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
+        <div class="bold-text">Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> agents</div>
+        <div class="subheading">Arch is an <a href="https://github.com/katanemo/arch">intelligent</a> gateway designed to protect, observe and personalize AI agents with your APIs</div>
        <div class="buttons">
-            <a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
+            <a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
            <a href="https://docs.archgw.com">Documentation</a>
        </div>
        <hr>
        <div class="why_arch">
            <h3>Why Arch?</h3>
-            <p>The open source project was born out of the belief that:</p>
+            <p>Arch is built on (and by the core contributors of) <a href="https://www.envoyproxy.io">Envoy proxy</a> with the belief that:
            <blockquote>
                <p><em>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests
                including secure handling, intelligent routing, robust observability, and seamless integration with backend (API)
@ -248,7 +258,7 @@
    </div>
    <h2 class="get-started">Let's get started </h2>
    <div class="buttons">
-        <a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
+        <a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
        <a href="https://docs.archgw.com">Documentation</a>
    </div>
 </body>