Merge commit '097513ee60' into adil/release_0.1.3

This commit is contained in:
Adil Hafeez 2024-11-17 17:03:00 -08:00
commit 689b462d01
13 changed files with 48 additions and 15 deletions

View file

@ -7,9 +7,9 @@
[![e2e tests](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml)
[![Build and Deploy Documentation](https://github.com/katanemo/arch/actions/workflows/static.yml/badge.svg)](https://github.com/katanemo/arch/actions/workflows/static.yml)
## Build fast, robust, and personalized AI agents.
## Build fast, observable, and personalized AI agents.
Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize LLM applications (agents, assistants, co-pilots) with your APIs.
Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize AI agents (assistants, co-pilots) with your APIs.
Engineered with purpose-built LLMs, Arch handles the critical but undifferentiated tasks related to the handling and processing of prompts, including detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligently calling "backend" APIs to fulfill the user's request represented in a prompt, routing to and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM interactions in a centralized way.
@ -156,6 +156,8 @@ print("OpenAI Response:", response.choices[0].message.content)
### [Observability](https://docs.archgw.com/guides/observability/observability.html)
Arch is designed to support best-in class observability by supporting open standards. Please read our [docs](https://docs.archgw.com/guides/observability/observability.html) on observability for more details on tracing, metrics, and logs
![alt text](docs/source/_static/img/tracing.png)
### Contribution
We would love feedback on our [Roadmap](https://github.com/orgs/katanemo/projects/1) and we welcome contributions to **Arch**!
Whether you're fixing bugs, adding new features, improving documentation, or creating tutorials, your help is much appreciated.

View file

@ -40,6 +40,7 @@ pub struct StreamContext {
ttft_duration: Option<Duration>,
ttft_time: Option<SystemTime>,
pub traceparent: Option<String>,
request_body_sent_time: Option<SystemTime>,
user_message: Option<Message>,
}
@ -60,6 +61,7 @@ impl StreamContext {
traceparent: None,
ttft_time: None,
user_message: None,
request_body_sent_time: None,
}
}
fn llm_provider(&self) -> &LlmProvider {
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
// Let the client send the gateway all the data before sending to the LLM_provider.
// TODO: consider a streaming API.
if self.request_body_sent_time.is_none() {
self.request_body_sent_time = Some(get_current_time().unwrap());
}
if !end_of_stream {
return Action::Pause;
}
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
"upstream_llm_time".to_string(),
parent_trace_id.to_string(),
Some(parent_span_id.to_string()),
self.start_time
self.request_body_sent_time
.unwrap()
.duration_since(UNIX_EPOCH)
.unwrap()

View file

@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
.expect_metric_record("input_sequence_length", 21)
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(incomplete_chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
.expect_log(Some(LogLevel::Debug), None)
.expect_send_local_response(
Some(StatusCode::BAD_REQUEST.as_u16().into()),
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
)
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
.returning(Some(chat_completions_request_body))
.expect_get_current_time_nanos()
.returning(Some(0))
// The actual call is not important in this test, we just need to grab the token_id
.expect_log(Some(LogLevel::Trace), None)
.expect_log(Some(LogLevel::Debug), None)

Binary file not shown.

After

Width:  |  Height:  |  Size: 692 KiB

View file

@ -21,8 +21,8 @@ Upstream (Egress)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
When you start Arch, it creates a listener for egress traffic based on the presence of the ``listener`` configuration
section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:9000/v1`` or a DNS-based
address like ``arch.local:9000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.
section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:12000/v1`` or a DNS-based
address like ``arch.local:12000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.
Configure Listener
^^^^^^^^^^^^^^^^^^

View file

@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
project = "Arch Docs"
copyright = "2024, Katanemo Labs, Inc"
author = "Katanemo Labs, Inc"
release = " v0.1"
release = " v0.1.2"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

View file

@ -23,6 +23,10 @@ flow, enabling **end-to-end tracing** across the entire application. By using Op
that developers can capture this trace data consistently and in a format compatible with various observability
tools.
.. image:: /_static/img/tracing.png
:width: 100%
:align: center
Benefits of Using ``Traceparent`` Headers
-----------------------------------------

View file

@ -9,10 +9,12 @@ Welcome to Arch!
<div style="text-align: center; font-size: 1.25rem;">
<br>
<p>Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</p>
<p>Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> GenAI apps</p>
</div>
Arch (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) was born out of the belief that:
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
`Arch <https://github.com/katanemo/arch>`_ is an intelligent infrastructure primitive for GenAI (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) that born out of the belief that:
*Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization - all outside business logic.*

View file

@ -58,7 +58,7 @@
display: block;
}
div.bold-text {
font-size: 1.5rem;
font-size: 1.4rem;
margin-bottom: 5px;
line-height: 3rem;
}
@ -170,27 +170,37 @@
}
</style>
</head>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-F1XYQ9H653"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-F1XYQ9H653');
</script>
<body>
<header>
<a href="https://github.com/katanemo/arch">GitHub</a>
<a href="https://docs.archgw.com">Docs</a>
<a href="https://discord.gg/rSRQ9fv7">Discord</a>
<a href="https://discord.gg/pGZf2gcwEc">Discord</a>
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#contact">Contact</a>
</header>
<div class="container">
<div class="image-placeholder">
<img src="https://storage.googleapis.com/arch-website-assets/arch-logo.png" alt="Arch Gateway Logo" title="Arch Gateway Logo">
</div>
<div class="bold-text">Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</div>
<div class="subheading">An <a href="https://github.com/katanemo/arch">open source</a> gateway engineered with (fast) LLMs to handle the <b>muck</b> of prompts.</div>
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch&#0045;3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build&#0032;fast&#0044;&#0032;hyper&#0045;personalized&#0032;agents&#0032;with&#0032;intelligent&#0032;infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
<div class="bold-text">Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> agents</div>
<div class="subheading">Arch is an <a href="https://github.com/katanemo/arch">intelligent</a> gateway designed to protect, observe and personalize AI agents with your APIs</div>
<div class="buttons">
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
<a href="https://docs.archgw.com">Documentation</a>
</div>
<hr>
<div class="why_arch">
<h3>Why Arch?</h3>
<p>The open source project was born out of the belief that:</p>
<p>Arch is built on (and by the core contributors of) <a href="https://www.envoyproxy.io">Envoy proxy</a> with the belief that:
<blockquote>
<p><em>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests
including secure handling, intelligent routing, robust observability, and seamless integration with backend (API)
@ -248,7 +258,7 @@
</div>
<h2 class="get-started">Let's get started </h2>
<div class="buttons">
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
<a href="https://docs.archgw.com">Documentation</a>
</div>
</body>