move custom tracer to llm filter (#267)

This commit is contained in:
Adil Hafeez 2024-11-15 10:44:01 -08:00 committed by GitHub
parent 1d229cba8f
commit d3c17c7abd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 335 additions and 133 deletions

View file

@ -13,16 +13,21 @@ FROM envoyproxy/envoy:v1.32-latest as envoy
#Build config generator, so that we have a single build image for both Rust and Python
FROM python:3.12-slim as arch
RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y gettext-base curl supervisor && apt-get clean && rm -rf /var/lib/apt/lists/*
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
WORKDIR /config
WORKDIR /app
COPY arch/requirements.txt .
RUN pip install -r requirements.txt
COPY arch/tools/cli/config_generator.py .
COPY arch/envoy.template.yaml .
COPY arch/arch_config_schema.yaml .
COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY arch/stream_traces.py .
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
RUN pip install requests
RUN touch /var/log/envoy.log
ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

View file

@ -8,11 +8,11 @@ services:
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ./envoy.template.yaml:/config/envoy.template.yaml
- ./arch_config_schema.yaml:/config/arch_config_schema.yaml
- ./tools/cli/config_generator.py:/config/config_generator.py
- ./envoy.template.yaml:/app/envoy.template.yaml
- ./arch_config_schema.yaml:/app/arch_config_schema.yaml
- ./tools/cli/config_generator.py:/app/config_generator.py
- ../crates/target/wasm32-wasip1/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
- ../crates/target/wasm32-wasip1/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
- ~/archgw_logs:/var/log/
@ -21,3 +21,4 @@ services:
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces

View file

@ -16,3 +16,4 @@ services:
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces

View file

@ -8,11 +8,13 @@ services:
- "12000:12000"
- "19901:9901"
volumes:
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
- ~/archgw_logs:/var/log/
env_file:
- env.list
environment:
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:

View file

@ -242,11 +242,66 @@ static_resources:
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
- name: arch_listener_http_llm
address:
socket_address:
address: 0.0.0.0
port_value: 12000
traffic_direction: INBOUND
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
generate_request_id: true
tracing:
provider:
name: envoy.tracers.opentelemetry
typed_config:
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
grpc_service:
envoy_grpc:
cluster_name: opentelemetry_collector
timeout: 0.250s
service_name: arch_gateway
random_sampling:
value: {{ arch_tracing.random_sampling }}
{% endif %}
stat_prefix: arch_listener_http
codec_type: AUTO
scheme_header_transformation:
scheme_to_overwrite: https
access_log:
- name: envoy.access_loggers.file
typed_config:
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
path: "/var/log/access_llm.log"
route_config:
name: local_routes
virtual_hosts:
- name: local_service
domains:
- "*"
routes:
- match:
prefix: "/"
route:
auto_host_rewrite: true
cluster: arch_listener_llm
timeout: 60s
http_filters:
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
- name: arch_listener_llm
address:
socket_address:
address: 0.0.0.0
port_value: 12001
filter_chains:
- filters:
- name: envoy.filters.network.http_connection_manager
@ -479,6 +534,23 @@ static_resources:
port_value: 10001
hostname: arch_prompt_gateway_listener
- name: arch_listener_llm
connect_timeout: 5s
type: LOGICAL_DNS
dns_lookup_family: V4_ONLY
lb_policy: ROUND_ROBIN
load_assignment:
cluster_name: arch_listener_llm
endpoints:
- lb_endpoints:
- endpoint:
address:
socket_address:
address: 0.0.0.0
port_value: 12001
hostname: arch_listener_llm
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
- name: opentelemetry_collector
type: STRICT_DNS

View file

@ -1,4 +1,5 @@
import os
import sys
import time
import requests
import logging
@ -29,14 +30,8 @@ def process_log_line(line):
logging.error(f"Failed to send trace to otel-tracing: {e}")
with open(envoy_log_path, "r") as f:
# Seek to the end of the file so we only read new lines
f.seek(0, os.SEEK_END)
while True:
line = f.readline()
if not line:
time.sleep(1)
continue
tokens = line.split("prompt_gateway: upstream_llm trace details: ")
for line in sys.stdin:
if line:
tokens = line.split("gateway: upstream_llm trace details: ")
if len(tokens) > 1:
process_log_line(tokens[1])

25
arch/supervisord.conf Normal file
View file

@ -0,0 +1,25 @@
[supervisord]
nodaemon=true
[program:trace_streamer]
command=sh -c "tail -F /var/log/envoy.log | python stream_traces.py"
autostart=true
autorestart=false
startretries=3
priority=1
stdout_logfile=/dev/stdout
stderr_logfile=/dev/stderr
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0
[program:envoy]
command=sh -c "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"
autostart=true
autorestart=true
startretries=3
priority=2
stdout_logfile=/dev/stdout
stderr_logfile=/dev/stderr
stdout_logfile_maxbytes = 0
stderr_logfile_maxbytes = 0

View file

@ -6,7 +6,7 @@ from jsonschema import validate
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
)
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/config/arch_config.yaml")
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
)

View file

@ -21,22 +21,25 @@
"path": "e2e_tests"
},
{
"name": "demos/weather_forecast",
"path": "./demos/weather_forecast",
},
{
"name": "demos/insurance_agent",
"path": "./demos/insurance_agent",
},
"name": "chatbot_ui",
"path": "demos/shared/chatbot_ui"
}
],
"settings": {
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true
},
},
"extensions": {
"recommendations": [
"ms-python.python",
"ms-python.debugpy",
"rust-lang.rust-analyzer",
"humao.rest-client"
"humao.rest-client",
"github.copilot",
"eamodio.gitlens",
"ms-python.black-formatter",
]
}
}

View file

@ -1,17 +1,18 @@
use crate::filter_context::WasmMetrics;
use common::common_types::open_ai::{
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
StreamOptions,
Message, StreamOptions,
};
use common::configuration::LlmProvider;
use common::consts::{
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER,
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
};
use common::errors::ServerError;
use common::llm_providers::LlmProviders;
use common::pii::obfuscate_auth_header;
use common::ratelimit::Header;
use common::tracing::{Event, Span};
use common::{ratelimit, routing, tokenizer};
use http::StatusCode;
use log::{debug, trace, warn};
@ -23,7 +24,7 @@ use std::rc::Rc;
use common::stats::{IncrementingMetric, RecordingMetric};
use proxy_wasm::hostcalls::get_current_time;
use std::time::{Duration, SystemTime};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
pub struct StreamContext {
context_id: u32,
@ -36,7 +37,10 @@ pub struct StreamContext {
llm_provider: Option<Rc<LlmProvider>>,
request_id: Option<String>,
start_time: Option<SystemTime>,
ttft_duration: Option<Duration>, // Store the duration directly
ttft_duration: Option<Duration>,
ttft_time: Option<SystemTime>,
pub traceparent: Option<String>,
user_message: Option<Message>,
}
impl StreamContext {
@ -53,6 +57,9 @@ impl StreamContext {
request_id: None,
start_time: None,
ttft_duration: None,
traceparent: None,
ttft_time: None,
user_message: None,
}
}
fn llm_provider(&self) -> &LlmProvider {
@ -176,9 +183,10 @@ impl HttpContext for StreamContext {
);
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
//start the timing for the request using get_current_time()
let current_time = get_current_time().unwrap();
let current_time: SystemTime = get_current_time().unwrap();
self.start_time = Some(current_time);
self.ttft_duration = None;
@ -229,6 +237,13 @@ impl HttpContext for StreamContext {
message.model = None;
}
self.user_message = deserialized_body
.messages
.iter()
.filter(|m| m.role == "user")
.last()
.cloned();
// override model name from the llm provider
deserialized_body
.model
@ -318,6 +333,52 @@ impl HttpContext for StreamContext {
.output_sequence_length
.record(self.response_tokens as u64);
if let Some(traceparent) = self.traceparent.as_ref() {
let since_the_epoch_ns = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let traceparent_tokens = traceparent.split("-").collect::<Vec<&str>>();
if traceparent_tokens.len() != 4 {
warn!("traceparent header is invalid: {}", traceparent);
return Action::Continue;
}
let parent_trace_id = traceparent_tokens[1];
let parent_span_id = traceparent_tokens[2];
let mut trace_data = common::tracing::TraceData::new();
let mut llm_span = Span::new(
"upstream_llm_time".to_string(),
parent_trace_id.to_string(),
Some(parent_span_id.to_string()),
self.start_time
.unwrap()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos(),
since_the_epoch_ns,
);
if let Some(user_message) = self.user_message.as_ref() {
if let Some(prompt) = user_message.content.as_ref() {
llm_span.add_attribute("user_prompt".to_string(), prompt.to_string());
}
}
llm_span.add_attribute("model".to_string(), self.llm_provider().name.to_string());
llm_span.add_event(Event::new(
"time_to_first_token".to_string(),
self.ttft_time
.unwrap()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos(),
));
trace_data.add_span(llm_span);
let trace_data_str = serde_json::to_string(&trace_data).unwrap();
debug!("upstream_llm trace details: {}", trace_data_str);
// send trace_data to http tracing endpoint
}
return Action::Continue;
}
@ -413,6 +474,7 @@ impl HttpContext for StreamContext {
if self.ttft_duration.is_none() {
if let Some(start_time) = self.start_time {
let current_time = get_current_time().unwrap();
self.ttft_time = Some(current_time);
match current_time.duration_since(start_time) {
Ok(duration) => {
let duration_ms = duration.as_millis();

View file

@ -51,6 +51,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
.expect_log(Some(LogLevel::Debug), None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
.returning(None)
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
.returning(None)
.expect_get_current_time_nanos()
.returning(Some(0))
.execute_and_expect(ReturnType::Action(Action::Continue))

View file

@ -18,7 +18,6 @@ use common::{
errors::ServerError,
http::{CallArgs, Client},
pii::obfuscate_auth_header,
tracing::{Event, Span},
};
use http::StatusCode;
use log::{debug, trace, warn};
@ -265,42 +264,6 @@ impl HttpContext for StreamContext {
}
if end_of_stream && body_size == 0 {
if let Some(traceparent) = self.traceparent.as_ref() {
let since_the_epoch_ns = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_nanos();
let traceparent_tokens = traceparent.split("-").collect::<Vec<&str>>();
if traceparent_tokens.len() != 4 {
warn!("traceparent header is invalid: {}", traceparent);
return Action::Continue;
}
let parent_trace_id = traceparent_tokens[1];
let parent_span_id = traceparent_tokens[2];
let mut trace_data = common::tracing::TraceData::new();
let mut llm_span = Span::new(
"upstream_llm_time".to_string(),
parent_trace_id.to_string(),
Some(parent_span_id.to_string()),
self.start_upstream_llm_request_time,
since_the_epoch_ns,
);
if let Some(prompt) = self.user_prompt.as_ref() {
if let Some(content) = prompt.content.as_ref() {
llm_span.add_attribute("user_prompt".to_string(), content.to_string());
}
}
llm_span.add_event(Event::new(
"time_to_first_token".to_string(),
self.time_to_first_token.unwrap(),
));
trace_data.add_span(llm_span);
let trace_data_str = serde_json::to_string(&trace_data).unwrap();
debug!("upstream_llm trace details: {}", trace_data_str);
// send trace_data to http tracing endpoint
}
return Action::Continue;
}

View file

@ -0,0 +1,19 @@
# LLM Routing
This demo shows how you can arch gateway to manage keys and route to appropricate LLM.
# Starting the demo
1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
1. Start Arch
```sh
sh run_demo.sh
```
1. Navigate to http://localhost:18080/
# Observability
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
# Selecting different LLM
You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM.

View file

@ -2,7 +2,7 @@ services:
chatbot_ui:
build:
context: ../../chatbot_ui
context: ../shared/chatbot_ui
dockerfile: Dockerfile
ports:
- "18080:8080"
@ -12,3 +12,21 @@ services:
- "host.docker.internal:host-gateway"
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
jaeger:
build:
context: ../shared/jaeger
ports:
- "16686:16686"
- "4317:4317"
- "4318:4318"
prometheus:
build:
context: ../shared/prometheus
grafana:
build:
context: ../shared/grafana
ports:
- "3000:3000"

View file

@ -15,19 +15,21 @@
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
"STREAMING": "True",
"ARCH_CONFIG": "../demos/weather_forecast/arch_config.yaml"
"ARCH_CONFIG": "../../weather_forecast/arch_config.yaml"
}
},
{
"python": "${workspaceFolder}/venv/bin/python",
"name": "chatbot-ui llm",
"cwd": "${workspaceFolder}/app",
"type": "debugpy",
"request": "launch",
"program": "run.py",
"program": "run_stream.py",
"console": "integratedTerminal",
"env": {
"LLM": "1",
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1"
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1",
"STREAMING": "True",
"ARCH_CONFIG": "../../llm_routing/arch_config.yaml"
}
},
]

View file

@ -1,3 +1,4 @@
from datetime import datetime
import json
import logging
import os
@ -159,13 +160,44 @@ def get_prompt_targets():
config = yaml.safe_load(file)
available_tools = []
for target in config["prompt_targets"]:
if not target.get("default", False):
available_tools.append(
convert_prompt_target_to_openai_format(target)
)
if "prompt_targets" in config:
for target in config["prompt_targets"]:
if not target.get("default", False):
available_tools.append(
convert_prompt_target_to_openai_format(target)
)
return {tool["name"]: tool["info"] for tool in available_tools}
elif "llm_providers" in config:
return config["llm_providers"]
return {tool["name"]: tool["info"] for tool in available_tools}
except Exception as e:
log.info(e)
return None
def get_llm_models():
try:
with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
config = yaml.safe_load(file)
available_models = [""]
default_llm = None
for llm_providers in config["llm_providers"]:
if llm_providers.get("default", False):
default_llm = llm_providers["name"]
else:
available_models.append(llm_providers["name"])
# place default model at the beginning of the list
if default_llm:
available_models.insert(0, default_llm)
return available_models
except Exception as e:
log.info(e)
return []
def format_log(message):
time_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
return f"{time_now} - {message}"

View file

@ -8,7 +8,7 @@ from typing import List, Optional, Tuple
from openai import OpenAI
from dotenv import load_dotenv
from common import get_prompt_targets, process_stream_chunk
from common import format_log, get_llm_models, get_prompt_targets, process_stream_chunk
load_dotenv()
@ -36,20 +36,28 @@ CSS_STYLE = """
footer {visibility: hidden}
"""
client = OpenAI(
api_key="--",
base_url=CHAT_COMPLETION_ENDPOINT,
)
def chat(
query: Optional[str],
conversation: Optional[List[Tuple[str, str]]],
history: List[dict],
debug_output: str,
model_selector: str,
):
history.append({"role": "user", "content": query})
if debug_output is None:
debug_output = ""
try:
headers = {}
if model_selector and model_selector != "":
headers["x-arch-llm-provider-hint"] = model_selector
client = OpenAI(
api_key="--",
base_url=CHAT_COMPLETION_ENDPOINT,
default_headers=headers,
)
response = client.chat.completions.create(
# we select model from arch_config file
model="--",
@ -65,15 +73,20 @@ def chat(
conversation.append((query, ""))
model_is_set = False
for chunk in response:
tokens = process_stream_chunk(chunk, history)
if tokens and not model_is_set:
model_is_set = True
model = history[-1]["model"]
debug_output = debug_output + "\n" + format_log(f"model: {model}")
if tokens:
conversation[-1] = (
conversation[-1][0],
conversation[-1][1] + tokens,
)
yield "", conversation, history
yield "", conversation, history, debug_output, model_selector
def main():
@ -94,8 +107,17 @@ def main():
value=get_prompt_targets(),
show_indices=False,
elem_classes="json-container",
min_height="95vh",
min_height="50vh",
)
model_selector_textbox = gr.Dropdown(
get_llm_models(),
label="override model",
elem_classes="dropdown",
)
debug_output = gr.TextArea(
label="debug output",
elem_classes="debug_output",
)
with gr.Column(scale=2):
chatbot = gr.Chatbot(
@ -110,7 +132,9 @@ def main():
)
textbox.submit(
chat, [textbox, chatbot, history], [textbox, chatbot, history]
chat,
[textbox, chatbot, history, debug_output, model_selector_textbox],
[textbox, chatbot, history, debug_output, model_selector_textbox],
)
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)

View file

@ -190,8 +190,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by(le) (rate(input_sequence_length_bucket[5m])))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -200,7 +200,7 @@
"useBackend": false
}
],
"title": "input sequence length (p50)",
"title": "input sequence length (p90)",
"type": "timeseries"
},
{
@ -305,7 +305,7 @@
},
"disableTextWrap": false,
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
"expr": "histogram_quantile(0.9, sum(rate(output_sequence_length_bucket[5m])) by(le))",
"fullMetaSearch": false,
"includeNullMetadata": false,
"instant": false,
@ -315,7 +315,7 @@
"useBackend": false
}
],
"title": "output sequence length (p50)",
"title": "output sequence length (p90)",
"type": "timeseries"
},
{
@ -415,7 +415,11 @@
{
"disableTextWrap": false,
"editorMode": "code",
<<<<<<< HEAD
"expr": "histogram_quantile(0.9, sum by(le) (rate(time_to_first_token_bucket[5m])))",
=======
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
>>>>>>> main
"fullMetaSearch": false,
"includeNullMetadata": false,
"legendFormat": "__auto",
@ -424,7 +428,7 @@
"useBackend": false
}
],
"title": "time to first token (p50)",
"title": "time to first token (p90)",
"type": "timeseries"
},
{
@ -539,20 +543,29 @@
"type": "prometheus",
"uid": "PBFA97CFB590B2093"
},
<<<<<<< HEAD
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum(rate(request_latency_bucket[60m])) by (le))",
=======
"disableTextWrap": false,
"editorMode": "builder",
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[1h])))",
"fullMetaSearch": false,
>>>>>>> main
"hide": false,
"includeNullMetadata": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
<<<<<<< HEAD
"refId": "B"
=======
"refId": "A",
"useBackend": false
>>>>>>> main
}
],
"title": "request latency (p50)",
"title": "request latency (p90)",
"type": "timeseries"
},
{

View file

@ -1,11 +0,0 @@
FROM python:3.12-slim as arch
WORKDIR /app
RUN pip install requests
COPY stream_traces.py .
RUN mkdir -p /var/log
RUN touch /var/log/envoy.log
CMD ["python", "stream_traces.py"]

View file

@ -30,14 +30,6 @@ services:
- "4317:4317"
- "4318:4318"
trace_streamer:
build:
context: ../shared/trace_streamer
environment:
- OTEL_TRACING_HTTP_ENDPOINT=http://jaeger:4318/v1/traces
volumes:
- ~/archgw_logs:/var/log/
prometheus:
build:
context: ../shared/prometheus

View file

@ -25,14 +25,6 @@ services:
volumes:
- ./arch_config.yaml:/app/arch_config.yaml
trace_streamer:
build:
context: ../shared/trace_streamer
environment:
- OTEL_TRACING_HTTP_ENDPOINT=http://otel-collector:4318/v1/traces
volumes:
- ~/archgw_logs:/var/log/
prometheus:
build:
context: ../shared/prometheus

View file

@ -29,32 +29,24 @@ cd ../demos/weather_forecast
docker compose up weather_forecast_service --build -d
cd -
print_disk_usage
log building and install model server
log =================================
cd ../model_server
poetry install
cd -
print_disk_usage
log building and installing archgw cli
log ==================================
cd ../arch/tools
sh build_cli.sh
cd -
print_disk_usage
log building docker image for arch gateway
log ======================================
cd ../
archgw build
cd -
print_disk_usage
log startup arch gateway with function calling demo
cd ..
tail -F ~/archgw_logs/modelserver.log &
@ -64,8 +56,6 @@ archgw up demos/weather_forecast/arch_config.yaml
kill $model_server_tail_pid
cd -
print_disk_usage
log running e2e tests
log =================
poetry install