mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
Merge commit '097513ee60' into adil/release_0.1.3
This commit is contained in:
commit
689b462d01
13 changed files with 48 additions and 15 deletions
|
|
@ -7,9 +7,9 @@
|
|||
[](https://github.com/katanemo/arch/actions/workflows/e2e_tests.yml)
|
||||
[](https://github.com/katanemo/arch/actions/workflows/static.yml)
|
||||
|
||||
## Build fast, robust, and personalized AI agents.
|
||||
## Build fast, observable, and personalized AI agents.
|
||||
|
||||
Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize LLM applications (agents, assistants, co-pilots) with your APIs.
|
||||
Arch is an intelligent [Layer 7](https://www.cloudflare.com/learning/ddos/what-is-layer-7/) gateway designed to protect, observe, and personalize AI agents (assistants, co-pilots) with your APIs.
|
||||
|
||||
Engineered with purpose-built LLMs, Arch handles the critical but undifferentiated tasks related to the handling and processing of prompts, including detecting and rejecting [jailbreak](https://github.com/verazuo/jailbreak_llms) attempts, intelligently calling "backend" APIs to fulfill the user's request represented in a prompt, routing to and offering disaster recovery between upstream LLMs, and managing the observability of prompts and LLM interactions in a centralized way.
|
||||
|
||||
|
|
@ -156,6 +156,8 @@ print("OpenAI Response:", response.choices[0].message.content)
|
|||
### [Observability](https://docs.archgw.com/guides/observability/observability.html)
|
||||
Arch is designed to support best-in class observability by supporting open standards. Please read our [docs](https://docs.archgw.com/guides/observability/observability.html) on observability for more details on tracing, metrics, and logs
|
||||
|
||||

|
||||
|
||||
### Contribution
|
||||
We would love feedback on our [Roadmap](https://github.com/orgs/katanemo/projects/1) and we welcome contributions to **Arch**!
|
||||
Whether you're fixing bugs, adding new features, improving documentation, or creating tutorials, your help is much appreciated.
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ pub struct StreamContext {
|
|||
ttft_duration: Option<Duration>,
|
||||
ttft_time: Option<SystemTime>,
|
||||
pub traceparent: Option<String>,
|
||||
request_body_sent_time: Option<SystemTime>,
|
||||
user_message: Option<Message>,
|
||||
}
|
||||
|
||||
|
|
@ -60,6 +61,7 @@ impl StreamContext {
|
|||
traceparent: None,
|
||||
ttft_time: None,
|
||||
user_message: None,
|
||||
request_body_sent_time: None,
|
||||
}
|
||||
}
|
||||
fn llm_provider(&self) -> &LlmProvider {
|
||||
|
|
@ -196,6 +198,11 @@ impl HttpContext for StreamContext {
|
|||
fn on_http_request_body(&mut self, body_size: usize, end_of_stream: bool) -> Action {
|
||||
// Let the client send the gateway all the data before sending to the LLM_provider.
|
||||
// TODO: consider a streaming API.
|
||||
|
||||
if self.request_body_sent_time.is_none() {
|
||||
self.request_body_sent_time = Some(get_current_time().unwrap());
|
||||
}
|
||||
|
||||
if !end_of_stream {
|
||||
return Action::Pause;
|
||||
}
|
||||
|
|
@ -351,7 +358,7 @@ impl HttpContext for StreamContext {
|
|||
"upstream_llm_time".to_string(),
|
||||
parent_trace_id.to_string(),
|
||||
Some(parent_span_id.to_string()),
|
||||
self.start_time
|
||||
self.request_body_sent_time
|
||||
.unwrap()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.unwrap()
|
||||
|
|
|
|||
|
|
@ -217,6 +217,8 @@ fn llm_gateway_successful_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_metric_record("input_sequence_length", 21)
|
||||
|
|
@ -279,6 +281,8 @@ fn llm_gateway_bad_request_to_open_ai_chat_completions() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(incomplete_chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
.expect_send_local_response(
|
||||
Some(StatusCode::BAD_REQUEST.as_u16().into()),
|
||||
|
|
@ -337,6 +341,8 @@ fn llm_gateway_request_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
@ -403,6 +409,8 @@ fn llm_gateway_request_not_ratelimited() {
|
|||
)
|
||||
.expect_get_buffer_bytes(Some(BufferType::HttpRequestBody))
|
||||
.returning(Some(chat_completions_request_body))
|
||||
.expect_get_current_time_nanos()
|
||||
.returning(Some(0))
|
||||
// The actual call is not important in this test, we just need to grab the token_id
|
||||
.expect_log(Some(LogLevel::Trace), None)
|
||||
.expect_log(Some(LogLevel::Debug), None)
|
||||
|
|
|
|||
BIN
docs/source/_static/img/tracing.png
Normal file
BIN
docs/source/_static/img/tracing.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 692 KiB |
|
|
@ -21,8 +21,8 @@ Upstream (Egress)
|
|||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Arch automatically configures a listener to route requests from your application to upstream LLM API providers (or hosts).
|
||||
When you start Arch, it creates a listener for egress traffic based on the presence of the ``listener`` configuration
|
||||
section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:9000/v1`` or a DNS-based
|
||||
address like ``arch.local:9000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.
|
||||
section in the configuration file. Arch binds itself to a local address such as ``127.0.0.1:12000/v1`` or a DNS-based
|
||||
address like ``arch.local:12000/v1`` for outgoing traffic. For more details on LLM providers, read :ref:`here <llm_provider>`.
|
||||
|
||||
Configure Listener
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from sphinxawesome_theme.postprocess import Icons
|
|||
project = "Arch Docs"
|
||||
copyright = "2024, Katanemo Labs, Inc"
|
||||
author = "Katanemo Labs, Inc"
|
||||
release = " v0.1"
|
||||
release = " v0.1.2"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ flow, enabling **end-to-end tracing** across the entire application. By using Op
|
|||
that developers can capture this trace data consistently and in a format compatible with various observability
|
||||
tools.
|
||||
|
||||
.. image:: /_static/img/tracing.png
|
||||
:width: 100%
|
||||
:align: center
|
||||
|
||||
|
||||
Benefits of Using ``Traceparent`` Headers
|
||||
-----------------------------------------
|
||||
|
|
|
|||
|
|
@ -9,10 +9,12 @@ Welcome to Arch!
|
|||
|
||||
<div style="text-align: center; font-size: 1.25rem;">
|
||||
<br>
|
||||
<p>Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</p>
|
||||
<p>Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> GenAI apps</p>
|
||||
</div>
|
||||
|
||||
Arch (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) was born out of the belief that:
|
||||
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch-3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build fast, hyper-personalized agents with intelligent infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
|
||||
|
||||
`Arch <https://github.com/katanemo/arch>`_ is an intelligent infrastructure primitive for GenAI (built by the contributors of `Envoy <https://www.envoyproxy.io/>`_ ) that born out of the belief that:
|
||||
|
||||
*Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests including secure handling, intelligent routing, robust observability, and integration with backend (API) systems for personalization - all outside business logic.*
|
||||
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@
|
|||
display: block;
|
||||
}
|
||||
div.bold-text {
|
||||
font-size: 1.5rem;
|
||||
font-size: 1.4rem;
|
||||
margin-bottom: 5px;
|
||||
line-height: 3rem;
|
||||
}
|
||||
|
|
@ -170,27 +170,37 @@
|
|||
}
|
||||
</style>
|
||||
</head>
|
||||
<!-- Google tag (gtag.js) -->
|
||||
<script async src="https://www.googletagmanager.com/gtag/js?id=G-F1XYQ9H653"></script>
|
||||
<script>
|
||||
window.dataLayer = window.dataLayer || [];
|
||||
function gtag(){dataLayer.push(arguments);}
|
||||
gtag('js', new Date());
|
||||
|
||||
gtag('config', 'G-F1XYQ9H653');
|
||||
</script>
|
||||
<body>
|
||||
<header>
|
||||
<a href="https://github.com/katanemo/arch">GitHub</a>
|
||||
<a href="https://docs.archgw.com">Docs</a>
|
||||
<a href="https://discord.gg/rSRQ9fv7">Discord</a>
|
||||
<a href="https://discord.gg/pGZf2gcwEc">Discord</a>
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#contact">Contact</a>
|
||||
</header>
|
||||
<div class="container">
|
||||
<div class="image-placeholder">
|
||||
<img src="https://storage.googleapis.com/arch-website-assets/arch-logo.png" alt="Arch Gateway Logo" title="Arch Gateway Logo">
|
||||
</div>
|
||||
<div class="bold-text">Build <strong>fast</strong>, <strong>robust</strong>, and <strong>personalized</strong> GenAI apps</div>
|
||||
<div class="subheading">An <a href="https://github.com/katanemo/arch">open source</a> gateway engineered with (fast) LLMs to handle the <b>muck</b> of prompts.</div>
|
||||
<a href="https://www.producthunt.com/posts/arch-3?embed=true&utm_source=badge-top-post-badge&utm_medium=badge&utm_souce=badge-arch-3" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=565761&theme=light&period=daily" alt="Arch - Build fast, hyper-personalized agents with intelligent infra | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
|
||||
<div class="bold-text">Build <strong>fast</strong>, <strong>observable</strong>, and <strong>personalized</strong> agents</div>
|
||||
<div class="subheading">Arch is an <a href="https://github.com/katanemo/arch">intelligent</a> gateway designed to protect, observe and personalize AI agents with your APIs</div>
|
||||
<div class="buttons">
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
|
||||
<a href="https://docs.archgw.com">Documentation</a>
|
||||
</div>
|
||||
<hr>
|
||||
<div class="why_arch">
|
||||
<h3>Why Arch?</h3>
|
||||
<p>The open source project was born out of the belief that:</p>
|
||||
<p>Arch is built on (and by the core contributors of) <a href="https://www.envoyproxy.io">Envoy proxy</a> with the belief that:
|
||||
<blockquote>
|
||||
<p><em>Prompts are nuanced and opaque user requests, which require the same capabilities as traditional HTTP requests
|
||||
including secure handling, intelligent routing, robust observability, and seamless integration with backend (API)
|
||||
|
|
@ -248,7 +258,7 @@
|
|||
</div>
|
||||
<h2 class="get-started">Let's get started </h2>
|
||||
<div class="buttons">
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Download</a>
|
||||
<a href="https://github.com/katanemo/arch?tab=readme-ov-file#getstarted">Get Started</a>
|
||||
<a href="https://docs.archgw.com">Documentation</a>
|
||||
</div>
|
||||
</body>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue