mirror of
https://github.com/katanemo/plano.git
synced 2026-04-28 02:23:56 +02:00
move custom tracer to llm filter (#267)
This commit is contained in:
parent
1d229cba8f
commit
d3c17c7abd
22 changed files with 335 additions and 133 deletions
|
|
@ -13,16 +13,21 @@ FROM envoyproxy/envoy:v1.32-latest as envoy
|
||||||
#Build config generator, so that we have a single build image for both Rust and Python
|
#Build config generator, so that we have a single build image for both Rust and Python
|
||||||
FROM python:3.12-slim as arch
|
FROM python:3.12-slim as arch
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y gettext-base curl && apt-get clean && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y gettext-base curl supervisor && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
COPY --from=builder /arch/target/wasm32-wasip1/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||||
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
COPY --from=builder /arch/target/wasm32-wasip1/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||||
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
|
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
|
||||||
WORKDIR /config
|
WORKDIR /app
|
||||||
COPY arch/requirements.txt .
|
COPY arch/requirements.txt .
|
||||||
RUN pip install -r requirements.txt
|
RUN pip install -r requirements.txt
|
||||||
COPY arch/tools/cli/config_generator.py .
|
COPY arch/tools/cli/config_generator.py .
|
||||||
COPY arch/envoy.template.yaml .
|
COPY arch/envoy.template.yaml .
|
||||||
COPY arch/arch_config_schema.yaml .
|
COPY arch/arch_config_schema.yaml .
|
||||||
|
COPY arch/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
|
||||||
|
COPY arch/stream_traces.py .
|
||||||
|
|
||||||
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"]
|
RUN pip install requests
|
||||||
|
RUN touch /var/log/envoy.log
|
||||||
|
|
||||||
|
ENTRYPOINT ["supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,11 @@ services:
|
||||||
- "12000:12000"
|
- "12000:12000"
|
||||||
- "19901:9901"
|
- "19901:9901"
|
||||||
volumes:
|
volumes:
|
||||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
|
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
|
||||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||||
- ./envoy.template.yaml:/config/envoy.template.yaml
|
- ./envoy.template.yaml:/app/envoy.template.yaml
|
||||||
- ./arch_config_schema.yaml:/config/arch_config_schema.yaml
|
- ./arch_config_schema.yaml:/app/arch_config_schema.yaml
|
||||||
- ./tools/cli/config_generator.py:/config/config_generator.py
|
- ./tools/cli/config_generator.py:/app/config_generator.py
|
||||||
- ../crates/target/wasm32-wasip1/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
- ../crates/target/wasm32-wasip1/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||||
- ../crates/target/wasm32-wasip1/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
- ../crates/target/wasm32-wasip1/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||||
- ~/archgw_logs:/var/log/
|
- ~/archgw_logs:/var/log/
|
||||||
|
|
@ -21,3 +21,4 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||||
|
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||||
|
|
|
||||||
|
|
@ -16,3 +16,4 @@ services:
|
||||||
environment:
|
environment:
|
||||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||||
|
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||||
|
|
|
||||||
|
|
@ -8,11 +8,13 @@ services:
|
||||||
- "12000:12000"
|
- "12000:12000"
|
||||||
- "19901:9901"
|
- "19901:9901"
|
||||||
volumes:
|
volumes:
|
||||||
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/config/arch_config.yaml
|
- ${ARCH_CONFIG_FILE:-../demos/weather_forecast/arch_config.yaml}:/app/arch_config.yaml
|
||||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||||
- ~/archgw_logs:/var/log/
|
- ~/archgw_logs:/var/log/
|
||||||
env_file:
|
env_file:
|
||||||
- env.list
|
- env.list
|
||||||
|
environment:
|
||||||
|
- OTEL_TRACING_HTTP_ENDPOINT=http://host.docker.internal:4318/v1/traces
|
||||||
extra_hosts:
|
extra_hosts:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
healthcheck:
|
healthcheck:
|
||||||
|
|
|
||||||
|
|
@ -242,11 +242,66 @@ static_resources:
|
||||||
typed_config:
|
typed_config:
|
||||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
- name: arch_listener_llm
|
|
||||||
|
- name: arch_listener_http_llm
|
||||||
address:
|
address:
|
||||||
socket_address:
|
socket_address:
|
||||||
address: 0.0.0.0
|
address: 0.0.0.0
|
||||||
port_value: 12000
|
port_value: 12000
|
||||||
|
traffic_direction: INBOUND
|
||||||
|
filter_chains:
|
||||||
|
- filters:
|
||||||
|
- name: envoy.filters.network.http_connection_manager
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
|
||||||
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||||
|
generate_request_id: true
|
||||||
|
tracing:
|
||||||
|
provider:
|
||||||
|
name: envoy.tracers.opentelemetry
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig
|
||||||
|
grpc_service:
|
||||||
|
envoy_grpc:
|
||||||
|
cluster_name: opentelemetry_collector
|
||||||
|
timeout: 0.250s
|
||||||
|
service_name: arch_gateway
|
||||||
|
random_sampling:
|
||||||
|
value: {{ arch_tracing.random_sampling }}
|
||||||
|
{% endif %}
|
||||||
|
stat_prefix: arch_listener_http
|
||||||
|
codec_type: AUTO
|
||||||
|
scheme_header_transformation:
|
||||||
|
scheme_to_overwrite: https
|
||||||
|
access_log:
|
||||||
|
- name: envoy.access_loggers.file
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog
|
||||||
|
path: "/var/log/access_llm.log"
|
||||||
|
route_config:
|
||||||
|
name: local_routes
|
||||||
|
virtual_hosts:
|
||||||
|
- name: local_service
|
||||||
|
domains:
|
||||||
|
- "*"
|
||||||
|
routes:
|
||||||
|
- match:
|
||||||
|
prefix: "/"
|
||||||
|
route:
|
||||||
|
auto_host_rewrite: true
|
||||||
|
cluster: arch_listener_llm
|
||||||
|
timeout: 60s
|
||||||
|
http_filters:
|
||||||
|
- name: envoy.filters.http.router
|
||||||
|
typed_config:
|
||||||
|
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||||
|
|
||||||
|
|
||||||
|
- name: arch_listener_llm
|
||||||
|
address:
|
||||||
|
socket_address:
|
||||||
|
address: 0.0.0.0
|
||||||
|
port_value: 12001
|
||||||
filter_chains:
|
filter_chains:
|
||||||
- filters:
|
- filters:
|
||||||
- name: envoy.filters.network.http_connection_manager
|
- name: envoy.filters.network.http_connection_manager
|
||||||
|
|
@ -479,6 +534,23 @@ static_resources:
|
||||||
port_value: 10001
|
port_value: 10001
|
||||||
hostname: arch_prompt_gateway_listener
|
hostname: arch_prompt_gateway_listener
|
||||||
|
|
||||||
|
- name: arch_listener_llm
|
||||||
|
connect_timeout: 5s
|
||||||
|
type: LOGICAL_DNS
|
||||||
|
dns_lookup_family: V4_ONLY
|
||||||
|
lb_policy: ROUND_ROBIN
|
||||||
|
load_assignment:
|
||||||
|
cluster_name: arch_listener_llm
|
||||||
|
endpoints:
|
||||||
|
- lb_endpoints:
|
||||||
|
- endpoint:
|
||||||
|
address:
|
||||||
|
socket_address:
|
||||||
|
address: 0.0.0.0
|
||||||
|
port_value: 12001
|
||||||
|
hostname: arch_listener_llm
|
||||||
|
|
||||||
|
|
||||||
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
{% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %}
|
||||||
- name: opentelemetry_collector
|
- name: opentelemetry_collector
|
||||||
type: STRICT_DNS
|
type: STRICT_DNS
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
import requests
|
import requests
|
||||||
import logging
|
import logging
|
||||||
|
|
@ -29,14 +30,8 @@ def process_log_line(line):
|
||||||
logging.error(f"Failed to send trace to otel-tracing: {e}")
|
logging.error(f"Failed to send trace to otel-tracing: {e}")
|
||||||
|
|
||||||
|
|
||||||
with open(envoy_log_path, "r") as f:
|
for line in sys.stdin:
|
||||||
# Seek to the end of the file so we only read new lines
|
if line:
|
||||||
f.seek(0, os.SEEK_END)
|
tokens = line.split("gateway: upstream_llm trace details: ")
|
||||||
while True:
|
|
||||||
line = f.readline()
|
|
||||||
if not line:
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
tokens = line.split("prompt_gateway: upstream_llm trace details: ")
|
|
||||||
if len(tokens) > 1:
|
if len(tokens) > 1:
|
||||||
process_log_line(tokens[1])
|
process_log_line(tokens[1])
|
||||||
25
arch/supervisord.conf
Normal file
25
arch/supervisord.conf
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
[supervisord]
|
||||||
|
nodaemon=true
|
||||||
|
|
||||||
|
[program:trace_streamer]
|
||||||
|
command=sh -c "tail -F /var/log/envoy.log | python stream_traces.py"
|
||||||
|
autostart=true
|
||||||
|
autorestart=false
|
||||||
|
startretries=3
|
||||||
|
priority=1
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stdout_logfile_maxbytes = 0
|
||||||
|
stderr_logfile_maxbytes = 0
|
||||||
|
|
||||||
|
|
||||||
|
[program:envoy]
|
||||||
|
command=sh -c "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug 2>&1 | tee /var/log/envoy.log"
|
||||||
|
autostart=true
|
||||||
|
autorestart=true
|
||||||
|
startretries=3
|
||||||
|
priority=2
|
||||||
|
stdout_logfile=/dev/stdout
|
||||||
|
stderr_logfile=/dev/stderr
|
||||||
|
stdout_logfile_maxbytes = 0
|
||||||
|
stderr_logfile_maxbytes = 0
|
||||||
|
|
@ -6,7 +6,7 @@ from jsonschema import validate
|
||||||
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
ENVOY_CONFIG_TEMPLATE_FILE = os.getenv(
|
||||||
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
"ENVOY_CONFIG_TEMPLATE_FILE", "envoy.template.yaml"
|
||||||
)
|
)
|
||||||
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/config/arch_config.yaml")
|
ARCH_CONFIG_FILE = os.getenv("ARCH_CONFIG_FILE", "/app/arch_config.yaml")
|
||||||
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
|
ENVOY_CONFIG_FILE_RENDERED = os.getenv(
|
||||||
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
|
"ENVOY_CONFIG_FILE_RENDERED", "/etc/envoy/envoy.yaml"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -21,22 +21,25 @@
|
||||||
"path": "e2e_tests"
|
"path": "e2e_tests"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "demos/weather_forecast",
|
"name": "chatbot_ui",
|
||||||
"path": "./demos/weather_forecast",
|
"path": "demos/shared/chatbot_ui"
|
||||||
},
|
}
|
||||||
{
|
|
||||||
"name": "demos/insurance_agent",
|
|
||||||
"path": "./demos/insurance_agent",
|
|
||||||
},
|
|
||||||
],
|
],
|
||||||
"settings": {
|
"settings": {
|
||||||
|
"[python]": {
|
||||||
|
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||||
|
"editor.formatOnSave": true
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"extensions": {
|
"extensions": {
|
||||||
"recommendations": [
|
"recommendations": [
|
||||||
"ms-python.python",
|
"ms-python.python",
|
||||||
"ms-python.debugpy",
|
"ms-python.debugpy",
|
||||||
"rust-lang.rust-analyzer",
|
"rust-lang.rust-analyzer",
|
||||||
"humao.rest-client"
|
"humao.rest-client",
|
||||||
|
"github.copilot",
|
||||||
|
"eamodio.gitlens",
|
||||||
|
"ms-python.black-formatter",
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,18 @@
|
||||||
use crate::filter_context::WasmMetrics;
|
use crate::filter_context::WasmMetrics;
|
||||||
use common::common_types::open_ai::{
|
use common::common_types::open_ai::{
|
||||||
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
|
ChatCompletionStreamResponseServerEvents, ChatCompletionsRequest, ChatCompletionsResponse,
|
||||||
StreamOptions,
|
Message, StreamOptions,
|
||||||
};
|
};
|
||||||
use common::configuration::LlmProvider;
|
use common::configuration::LlmProvider;
|
||||||
use common::consts::{
|
use common::consts::{
|
||||||
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
|
ARCH_PROVIDER_HINT_HEADER, ARCH_ROUTING_HEADER, CHAT_COMPLETIONS_PATH,
|
||||||
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER,
|
RATELIMIT_SELECTOR_HEADER_KEY, REQUEST_ID_HEADER, TRACE_PARENT_HEADER,
|
||||||
};
|
};
|
||||||
use common::errors::ServerError;
|
use common::errors::ServerError;
|
||||||
use common::llm_providers::LlmProviders;
|
use common::llm_providers::LlmProviders;
|
||||||
use common::pii::obfuscate_auth_header;
|
use common::pii::obfuscate_auth_header;
|
||||||
use common::ratelimit::Header;
|
use common::ratelimit::Header;
|
||||||
|
use common::tracing::{Event, Span};
|
||||||
use common::{ratelimit, routing, tokenizer};
|
use common::{ratelimit, routing, tokenizer};
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
use log::{debug, trace, warn};
|
use log::{debug, trace, warn};
|
||||||
|
|
@ -23,7 +24,7 @@ use std::rc::Rc;
|
||||||
use common::stats::{IncrementingMetric, RecordingMetric};
|
use common::stats::{IncrementingMetric, RecordingMetric};
|
||||||
|
|
||||||
use proxy_wasm::hostcalls::get_current_time;
|
use proxy_wasm::hostcalls::get_current_time;
|
||||||
use std::time::{Duration, SystemTime};
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||||
|
|
||||||
pub struct StreamContext {
|
pub struct StreamContext {
|
||||||
context_id: u32,
|
context_id: u32,
|
||||||
|
|
@ -36,7 +37,10 @@ pub struct StreamContext {
|
||||||
llm_provider: Option<Rc<LlmProvider>>,
|
llm_provider: Option<Rc<LlmProvider>>,
|
||||||
request_id: Option<String>,
|
request_id: Option<String>,
|
||||||
start_time: Option<SystemTime>,
|
start_time: Option<SystemTime>,
|
||||||
ttft_duration: Option<Duration>, // Store the duration directly
|
ttft_duration: Option<Duration>,
|
||||||
|
ttft_time: Option<SystemTime>,
|
||||||
|
pub traceparent: Option<String>,
|
||||||
|
user_message: Option<Message>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StreamContext {
|
impl StreamContext {
|
||||||
|
|
@ -53,6 +57,9 @@ impl StreamContext {
|
||||||
request_id: None,
|
request_id: None,
|
||||||
start_time: None,
|
start_time: None,
|
||||||
ttft_duration: None,
|
ttft_duration: None,
|
||||||
|
traceparent: None,
|
||||||
|
ttft_time: None,
|
||||||
|
user_message: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn llm_provider(&self) -> &LlmProvider {
|
fn llm_provider(&self) -> &LlmProvider {
|
||||||
|
|
@ -176,9 +183,10 @@ impl HttpContext for StreamContext {
|
||||||
);
|
);
|
||||||
|
|
||||||
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
self.request_id = self.get_http_request_header(REQUEST_ID_HEADER);
|
||||||
|
self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER);
|
||||||
|
|
||||||
//start the timing for the request using get_current_time()
|
//start the timing for the request using get_current_time()
|
||||||
let current_time = get_current_time().unwrap();
|
let current_time: SystemTime = get_current_time().unwrap();
|
||||||
self.start_time = Some(current_time);
|
self.start_time = Some(current_time);
|
||||||
self.ttft_duration = None;
|
self.ttft_duration = None;
|
||||||
|
|
||||||
|
|
@ -229,6 +237,13 @@ impl HttpContext for StreamContext {
|
||||||
message.model = None;
|
message.model = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
self.user_message = deserialized_body
|
||||||
|
.messages
|
||||||
|
.iter()
|
||||||
|
.filter(|m| m.role == "user")
|
||||||
|
.last()
|
||||||
|
.cloned();
|
||||||
|
|
||||||
// override model name from the llm provider
|
// override model name from the llm provider
|
||||||
deserialized_body
|
deserialized_body
|
||||||
.model
|
.model
|
||||||
|
|
@ -318,6 +333,52 @@ impl HttpContext for StreamContext {
|
||||||
.output_sequence_length
|
.output_sequence_length
|
||||||
.record(self.response_tokens as u64);
|
.record(self.response_tokens as u64);
|
||||||
|
|
||||||
|
if let Some(traceparent) = self.traceparent.as_ref() {
|
||||||
|
let since_the_epoch_ns = SystemTime::now()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos();
|
||||||
|
|
||||||
|
let traceparent_tokens = traceparent.split("-").collect::<Vec<&str>>();
|
||||||
|
if traceparent_tokens.len() != 4 {
|
||||||
|
warn!("traceparent header is invalid: {}", traceparent);
|
||||||
|
return Action::Continue;
|
||||||
|
}
|
||||||
|
let parent_trace_id = traceparent_tokens[1];
|
||||||
|
let parent_span_id = traceparent_tokens[2];
|
||||||
|
let mut trace_data = common::tracing::TraceData::new();
|
||||||
|
let mut llm_span = Span::new(
|
||||||
|
"upstream_llm_time".to_string(),
|
||||||
|
parent_trace_id.to_string(),
|
||||||
|
Some(parent_span_id.to_string()),
|
||||||
|
self.start_time
|
||||||
|
.unwrap()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos(),
|
||||||
|
since_the_epoch_ns,
|
||||||
|
);
|
||||||
|
if let Some(user_message) = self.user_message.as_ref() {
|
||||||
|
if let Some(prompt) = user_message.content.as_ref() {
|
||||||
|
llm_span.add_attribute("user_prompt".to_string(), prompt.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llm_span.add_attribute("model".to_string(), self.llm_provider().name.to_string());
|
||||||
|
llm_span.add_event(Event::new(
|
||||||
|
"time_to_first_token".to_string(),
|
||||||
|
self.ttft_time
|
||||||
|
.unwrap()
|
||||||
|
.duration_since(UNIX_EPOCH)
|
||||||
|
.unwrap()
|
||||||
|
.as_nanos(),
|
||||||
|
));
|
||||||
|
trace_data.add_span(llm_span);
|
||||||
|
|
||||||
|
let trace_data_str = serde_json::to_string(&trace_data).unwrap();
|
||||||
|
debug!("upstream_llm trace details: {}", trace_data_str);
|
||||||
|
// send trace_data to http tracing endpoint
|
||||||
|
}
|
||||||
|
|
||||||
return Action::Continue;
|
return Action::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -413,6 +474,7 @@ impl HttpContext for StreamContext {
|
||||||
if self.ttft_duration.is_none() {
|
if self.ttft_duration.is_none() {
|
||||||
if let Some(start_time) = self.start_time {
|
if let Some(start_time) = self.start_time {
|
||||||
let current_time = get_current_time().unwrap();
|
let current_time = get_current_time().unwrap();
|
||||||
|
self.ttft_time = Some(current_time);
|
||||||
match current_time.duration_since(start_time) {
|
match current_time.duration_since(start_time) {
|
||||||
Ok(duration) => {
|
Ok(duration) => {
|
||||||
let duration_ms = duration.as_millis();
|
let duration_ms = duration.as_millis();
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ fn request_headers_expectations(module: &mut Tester, http_context: i32) {
|
||||||
.expect_log(Some(LogLevel::Debug), None)
|
.expect_log(Some(LogLevel::Debug), None)
|
||||||
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("x-request-id"))
|
||||||
.returning(None)
|
.returning(None)
|
||||||
|
.expect_get_header_map_value(Some(MapType::HttpRequestHeaders), Some("traceparent"))
|
||||||
|
.returning(None)
|
||||||
.expect_get_current_time_nanos()
|
.expect_get_current_time_nanos()
|
||||||
.returning(Some(0))
|
.returning(Some(0))
|
||||||
.execute_and_expect(ReturnType::Action(Action::Continue))
|
.execute_and_expect(ReturnType::Action(Action::Continue))
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@ use common::{
|
||||||
errors::ServerError,
|
errors::ServerError,
|
||||||
http::{CallArgs, Client},
|
http::{CallArgs, Client},
|
||||||
pii::obfuscate_auth_header,
|
pii::obfuscate_auth_header,
|
||||||
tracing::{Event, Span},
|
|
||||||
};
|
};
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
use log::{debug, trace, warn};
|
use log::{debug, trace, warn};
|
||||||
|
|
@ -265,42 +264,6 @@ impl HttpContext for StreamContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
if end_of_stream && body_size == 0 {
|
if end_of_stream && body_size == 0 {
|
||||||
if let Some(traceparent) = self.traceparent.as_ref() {
|
|
||||||
let since_the_epoch_ns = SystemTime::now()
|
|
||||||
.duration_since(UNIX_EPOCH)
|
|
||||||
.unwrap()
|
|
||||||
.as_nanos();
|
|
||||||
|
|
||||||
let traceparent_tokens = traceparent.split("-").collect::<Vec<&str>>();
|
|
||||||
if traceparent_tokens.len() != 4 {
|
|
||||||
warn!("traceparent header is invalid: {}", traceparent);
|
|
||||||
return Action::Continue;
|
|
||||||
}
|
|
||||||
let parent_trace_id = traceparent_tokens[1];
|
|
||||||
let parent_span_id = traceparent_tokens[2];
|
|
||||||
let mut trace_data = common::tracing::TraceData::new();
|
|
||||||
let mut llm_span = Span::new(
|
|
||||||
"upstream_llm_time".to_string(),
|
|
||||||
parent_trace_id.to_string(),
|
|
||||||
Some(parent_span_id.to_string()),
|
|
||||||
self.start_upstream_llm_request_time,
|
|
||||||
since_the_epoch_ns,
|
|
||||||
);
|
|
||||||
if let Some(prompt) = self.user_prompt.as_ref() {
|
|
||||||
if let Some(content) = prompt.content.as_ref() {
|
|
||||||
llm_span.add_attribute("user_prompt".to_string(), content.to_string());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
llm_span.add_event(Event::new(
|
|
||||||
"time_to_first_token".to_string(),
|
|
||||||
self.time_to_first_token.unwrap(),
|
|
||||||
));
|
|
||||||
trace_data.add_span(llm_span);
|
|
||||||
|
|
||||||
let trace_data_str = serde_json::to_string(&trace_data).unwrap();
|
|
||||||
debug!("upstream_llm trace details: {}", trace_data_str);
|
|
||||||
// send trace_data to http tracing endpoint
|
|
||||||
}
|
|
||||||
return Action::Continue;
|
return Action::Continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
19
demos/llm_routing/README.md
Normal file
19
demos/llm_routing/README.md
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
# LLM Routing
|
||||||
|
This demo shows how you can arch gateway to manage keys and route to appropricate LLM.
|
||||||
|
|
||||||
|
# Starting the demo
|
||||||
|
1. Please make sure the [pre-requisites](https://github.com/katanemo/arch/?tab=readme-ov-file#prerequisites) are installed correctly
|
||||||
|
1. Start Arch
|
||||||
|
```sh
|
||||||
|
sh run_demo.sh
|
||||||
|
```
|
||||||
|
1. Navigate to http://localhost:18080/
|
||||||
|
|
||||||
|
# Observability
|
||||||
|
Arch gateway publishes stats endpoint at http://localhost:19901/stats. In this demo we are using prometheus to pull stats from arch and we are using grafana to visalize the stats in dashboard. To see grafana dashboard follow instructions below,
|
||||||
|
|
||||||
|
1. Navigate to http://localhost:3000/ to open grafana UI (use admin/grafana as credentials)
|
||||||
|
1. From grafana left nav click on dashboards and select "Intelligent Gateway Overview" to view arch gateway stats
|
||||||
|
|
||||||
|
# Selecting different LLM
|
||||||
|
You can pick different LLM based on header `x-arch-llm-provider-hint` to override default LLM.
|
||||||
|
|
@ -2,7 +2,7 @@ services:
|
||||||
|
|
||||||
chatbot_ui:
|
chatbot_ui:
|
||||||
build:
|
build:
|
||||||
context: ../../chatbot_ui
|
context: ../shared/chatbot_ui
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
ports:
|
ports:
|
||||||
- "18080:8080"
|
- "18080:8080"
|
||||||
|
|
@ -12,3 +12,21 @@ services:
|
||||||
- "host.docker.internal:host-gateway"
|
- "host.docker.internal:host-gateway"
|
||||||
volumes:
|
volumes:
|
||||||
- ./arch_config.yaml:/app/arch_config.yaml
|
- ./arch_config.yaml:/app/arch_config.yaml
|
||||||
|
|
||||||
|
jaeger:
|
||||||
|
build:
|
||||||
|
context: ../shared/jaeger
|
||||||
|
ports:
|
||||||
|
- "16686:16686"
|
||||||
|
- "4317:4317"
|
||||||
|
- "4318:4318"
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
build:
|
||||||
|
context: ../shared/prometheus
|
||||||
|
|
||||||
|
grafana:
|
||||||
|
build:
|
||||||
|
context: ../shared/grafana
|
||||||
|
ports:
|
||||||
|
- "3000:3000"
|
||||||
|
|
|
||||||
10
demos/shared/chatbot_ui/.vscode/launch.json
vendored
10
demos/shared/chatbot_ui/.vscode/launch.json
vendored
|
|
@ -15,19 +15,21 @@
|
||||||
"LLM": "1",
|
"LLM": "1",
|
||||||
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
|
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
|
||||||
"STREAMING": "True",
|
"STREAMING": "True",
|
||||||
"ARCH_CONFIG": "../demos/weather_forecast/arch_config.yaml"
|
"ARCH_CONFIG": "../../weather_forecast/arch_config.yaml"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"python": "${workspaceFolder}/venv/bin/python",
|
||||||
"name": "chatbot-ui llm",
|
"name": "chatbot-ui llm",
|
||||||
"cwd": "${workspaceFolder}/app",
|
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "run.py",
|
"program": "run_stream.py",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"env": {
|
"env": {
|
||||||
"LLM": "1",
|
"LLM": "1",
|
||||||
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1"
|
"CHAT_COMPLETION_ENDPOINT": "http://localhost:12000/v1",
|
||||||
|
"STREAMING": "True",
|
||||||
|
"ARCH_CONFIG": "../../llm_routing/arch_config.yaml"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
@ -159,13 +160,44 @@ def get_prompt_targets():
|
||||||
config = yaml.safe_load(file)
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
available_tools = []
|
available_tools = []
|
||||||
for target in config["prompt_targets"]:
|
if "prompt_targets" in config:
|
||||||
if not target.get("default", False):
|
for target in config["prompt_targets"]:
|
||||||
available_tools.append(
|
if not target.get("default", False):
|
||||||
convert_prompt_target_to_openai_format(target)
|
available_tools.append(
|
||||||
)
|
convert_prompt_target_to_openai_format(target)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {tool["name"]: tool["info"] for tool in available_tools}
|
||||||
|
elif "llm_providers" in config:
|
||||||
|
return config["llm_providers"]
|
||||||
|
|
||||||
return {tool["name"]: tool["info"] for tool in available_tools}
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.info(e)
|
log.info(e)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_llm_models():
|
||||||
|
try:
|
||||||
|
with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
available_models = [""]
|
||||||
|
default_llm = None
|
||||||
|
for llm_providers in config["llm_providers"]:
|
||||||
|
if llm_providers.get("default", False):
|
||||||
|
default_llm = llm_providers["name"]
|
||||||
|
else:
|
||||||
|
available_models.append(llm_providers["name"])
|
||||||
|
|
||||||
|
# place default model at the beginning of the list
|
||||||
|
if default_llm:
|
||||||
|
available_models.insert(0, default_llm)
|
||||||
|
return available_models
|
||||||
|
except Exception as e:
|
||||||
|
log.info(e)
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def format_log(message):
|
||||||
|
time_now = datetime.now().strftime("%Y-%m-%d %H:%M:%S,%f")[:-3]
|
||||||
|
return f"{time_now} - {message}"
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ from typing import List, Optional, Tuple
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from common import get_prompt_targets, process_stream_chunk
|
from common import format_log, get_llm_models, get_prompt_targets, process_stream_chunk
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
|
|
@ -36,20 +36,28 @@ CSS_STYLE = """
|
||||||
footer {visibility: hidden}
|
footer {visibility: hidden}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
api_key="--",
|
|
||||||
base_url=CHAT_COMPLETION_ENDPOINT,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def chat(
|
def chat(
|
||||||
query: Optional[str],
|
query: Optional[str],
|
||||||
conversation: Optional[List[Tuple[str, str]]],
|
conversation: Optional[List[Tuple[str, str]]],
|
||||||
history: List[dict],
|
history: List[dict],
|
||||||
|
debug_output: str,
|
||||||
|
model_selector: str,
|
||||||
):
|
):
|
||||||
history.append({"role": "user", "content": query})
|
history.append({"role": "user", "content": query})
|
||||||
|
|
||||||
|
if debug_output is None:
|
||||||
|
debug_output = ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
headers = {}
|
||||||
|
if model_selector and model_selector != "":
|
||||||
|
headers["x-arch-llm-provider-hint"] = model_selector
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="--",
|
||||||
|
base_url=CHAT_COMPLETION_ENDPOINT,
|
||||||
|
default_headers=headers,
|
||||||
|
)
|
||||||
response = client.chat.completions.create(
|
response = client.chat.completions.create(
|
||||||
# we select model from arch_config file
|
# we select model from arch_config file
|
||||||
model="--",
|
model="--",
|
||||||
|
|
@ -65,15 +73,20 @@ def chat(
|
||||||
|
|
||||||
conversation.append((query, ""))
|
conversation.append((query, ""))
|
||||||
|
|
||||||
|
model_is_set = False
|
||||||
for chunk in response:
|
for chunk in response:
|
||||||
tokens = process_stream_chunk(chunk, history)
|
tokens = process_stream_chunk(chunk, history)
|
||||||
|
if tokens and not model_is_set:
|
||||||
|
model_is_set = True
|
||||||
|
model = history[-1]["model"]
|
||||||
|
debug_output = debug_output + "\n" + format_log(f"model: {model}")
|
||||||
if tokens:
|
if tokens:
|
||||||
conversation[-1] = (
|
conversation[-1] = (
|
||||||
conversation[-1][0],
|
conversation[-1][0],
|
||||||
conversation[-1][1] + tokens,
|
conversation[-1][1] + tokens,
|
||||||
)
|
)
|
||||||
|
|
||||||
yield "", conversation, history
|
yield "", conversation, history, debug_output, model_selector
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -94,8 +107,17 @@ def main():
|
||||||
value=get_prompt_targets(),
|
value=get_prompt_targets(),
|
||||||
show_indices=False,
|
show_indices=False,
|
||||||
elem_classes="json-container",
|
elem_classes="json-container",
|
||||||
min_height="95vh",
|
min_height="50vh",
|
||||||
)
|
)
|
||||||
|
model_selector_textbox = gr.Dropdown(
|
||||||
|
get_llm_models(),
|
||||||
|
label="override model",
|
||||||
|
elem_classes="dropdown",
|
||||||
|
)
|
||||||
|
debug_output = gr.TextArea(
|
||||||
|
label="debug output",
|
||||||
|
elem_classes="debug_output",
|
||||||
|
)
|
||||||
|
|
||||||
with gr.Column(scale=2):
|
with gr.Column(scale=2):
|
||||||
chatbot = gr.Chatbot(
|
chatbot = gr.Chatbot(
|
||||||
|
|
@ -110,7 +132,9 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
textbox.submit(
|
textbox.submit(
|
||||||
chat, [textbox, chatbot, history], [textbox, chatbot, history]
|
chat,
|
||||||
|
[textbox, chatbot, history, debug_output, model_selector_textbox],
|
||||||
|
[textbox, chatbot, history, debug_output, model_selector_textbox],
|
||||||
)
|
)
|
||||||
|
|
||||||
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)
|
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)
|
||||||
|
|
|
||||||
|
|
@ -190,8 +190,8 @@
|
||||||
"targets": [
|
"targets": [
|
||||||
{
|
{
|
||||||
"disableTextWrap": false,
|
"disableTextWrap": false,
|
||||||
"editorMode": "builder",
|
"editorMode": "code",
|
||||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(input_sequence_length_bucket[1h])))",
|
"expr": "histogram_quantile(0.9, sum by(le) (rate(input_sequence_length_bucket[5m])))",
|
||||||
"fullMetaSearch": false,
|
"fullMetaSearch": false,
|
||||||
"includeNullMetadata": false,
|
"includeNullMetadata": false,
|
||||||
"legendFormat": "__auto",
|
"legendFormat": "__auto",
|
||||||
|
|
@ -200,7 +200,7 @@
|
||||||
"useBackend": false
|
"useBackend": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "input sequence length (p50)",
|
"title": "input sequence length (p90)",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -305,7 +305,7 @@
|
||||||
},
|
},
|
||||||
"disableTextWrap": false,
|
"disableTextWrap": false,
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"expr": "histogram_quantile(0.5, sum(rate(output_sequence_length_bucket[1h])) by(le))",
|
"expr": "histogram_quantile(0.9, sum(rate(output_sequence_length_bucket[5m])) by(le))",
|
||||||
"fullMetaSearch": false,
|
"fullMetaSearch": false,
|
||||||
"includeNullMetadata": false,
|
"includeNullMetadata": false,
|
||||||
"instant": false,
|
"instant": false,
|
||||||
|
|
@ -315,7 +315,7 @@
|
||||||
"useBackend": false
|
"useBackend": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "output sequence length (p50)",
|
"title": "output sequence length (p90)",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -415,7 +415,11 @@
|
||||||
{
|
{
|
||||||
"disableTextWrap": false,
|
"disableTextWrap": false,
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
|
<<<<<<< HEAD
|
||||||
|
"expr": "histogram_quantile(0.9, sum by(le) (rate(time_to_first_token_bucket[5m])))",
|
||||||
|
=======
|
||||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
|
"expr": "histogram_quantile(0.5, sum by(le) (rate(time_to_first_token_bucket[1h])))",
|
||||||
|
>>>>>>> main
|
||||||
"fullMetaSearch": false,
|
"fullMetaSearch": false,
|
||||||
"includeNullMetadata": false,
|
"includeNullMetadata": false,
|
||||||
"legendFormat": "__auto",
|
"legendFormat": "__auto",
|
||||||
|
|
@ -424,7 +428,7 @@
|
||||||
"useBackend": false
|
"useBackend": false
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "time to first token (p50)",
|
"title": "time to first token (p90)",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -539,20 +543,29 @@
|
||||||
"type": "prometheus",
|
"type": "prometheus",
|
||||||
"uid": "PBFA97CFB590B2093"
|
"uid": "PBFA97CFB590B2093"
|
||||||
},
|
},
|
||||||
|
<<<<<<< HEAD
|
||||||
|
"editorMode": "code",
|
||||||
|
"expr": "histogram_quantile(0.9, sum(rate(request_latency_bucket[60m])) by (le))",
|
||||||
|
=======
|
||||||
"disableTextWrap": false,
|
"disableTextWrap": false,
|
||||||
"editorMode": "builder",
|
"editorMode": "builder",
|
||||||
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[1h])))",
|
"expr": "histogram_quantile(0.5, sum by(le) (rate(request_latency_bucket[1h])))",
|
||||||
"fullMetaSearch": false,
|
"fullMetaSearch": false,
|
||||||
|
>>>>>>> main
|
||||||
"hide": false,
|
"hide": false,
|
||||||
"includeNullMetadata": false,
|
"includeNullMetadata": false,
|
||||||
"instant": false,
|
"instant": false,
|
||||||
"legendFormat": "__auto",
|
"legendFormat": "__auto",
|
||||||
"range": true,
|
"range": true,
|
||||||
|
<<<<<<< HEAD
|
||||||
|
"refId": "B"
|
||||||
|
=======
|
||||||
"refId": "A",
|
"refId": "A",
|
||||||
"useBackend": false
|
"useBackend": false
|
||||||
|
>>>>>>> main
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "request latency (p50)",
|
"title": "request latency (p90)",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
FROM python:3.12-slim as arch
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN pip install requests
|
|
||||||
COPY stream_traces.py .
|
|
||||||
|
|
||||||
RUN mkdir -p /var/log
|
|
||||||
RUN touch /var/log/envoy.log
|
|
||||||
|
|
||||||
CMD ["python", "stream_traces.py"]
|
|
||||||
|
|
@ -30,14 +30,6 @@ services:
|
||||||
- "4317:4317"
|
- "4317:4317"
|
||||||
- "4318:4318"
|
- "4318:4318"
|
||||||
|
|
||||||
trace_streamer:
|
|
||||||
build:
|
|
||||||
context: ../shared/trace_streamer
|
|
||||||
environment:
|
|
||||||
- OTEL_TRACING_HTTP_ENDPOINT=http://jaeger:4318/v1/traces
|
|
||||||
volumes:
|
|
||||||
- ~/archgw_logs:/var/log/
|
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
build:
|
build:
|
||||||
context: ../shared/prometheus
|
context: ../shared/prometheus
|
||||||
|
|
|
||||||
|
|
@ -25,14 +25,6 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./arch_config.yaml:/app/arch_config.yaml
|
- ./arch_config.yaml:/app/arch_config.yaml
|
||||||
|
|
||||||
trace_streamer:
|
|
||||||
build:
|
|
||||||
context: ../shared/trace_streamer
|
|
||||||
environment:
|
|
||||||
- OTEL_TRACING_HTTP_ENDPOINT=http://otel-collector:4318/v1/traces
|
|
||||||
volumes:
|
|
||||||
- ~/archgw_logs:/var/log/
|
|
||||||
|
|
||||||
prometheus:
|
prometheus:
|
||||||
build:
|
build:
|
||||||
context: ../shared/prometheus
|
context: ../shared/prometheus
|
||||||
|
|
|
||||||
|
|
@ -29,32 +29,24 @@ cd ../demos/weather_forecast
|
||||||
docker compose up weather_forecast_service --build -d
|
docker compose up weather_forecast_service --build -d
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
print_disk_usage
|
|
||||||
|
|
||||||
log building and install model server
|
log building and install model server
|
||||||
log =================================
|
log =================================
|
||||||
cd ../model_server
|
cd ../model_server
|
||||||
poetry install
|
poetry install
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
print_disk_usage
|
|
||||||
|
|
||||||
log building and installing archgw cli
|
log building and installing archgw cli
|
||||||
log ==================================
|
log ==================================
|
||||||
cd ../arch/tools
|
cd ../arch/tools
|
||||||
sh build_cli.sh
|
sh build_cli.sh
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
print_disk_usage
|
|
||||||
|
|
||||||
log building docker image for arch gateway
|
log building docker image for arch gateway
|
||||||
log ======================================
|
log ======================================
|
||||||
cd ../
|
cd ../
|
||||||
archgw build
|
archgw build
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
print_disk_usage
|
|
||||||
|
|
||||||
log startup arch gateway with function calling demo
|
log startup arch gateway with function calling demo
|
||||||
cd ..
|
cd ..
|
||||||
tail -F ~/archgw_logs/modelserver.log &
|
tail -F ~/archgw_logs/modelserver.log &
|
||||||
|
|
@ -64,8 +56,6 @@ archgw up demos/weather_forecast/arch_config.yaml
|
||||||
kill $model_server_tail_pid
|
kill $model_server_tail_pid
|
||||||
cd -
|
cd -
|
||||||
|
|
||||||
print_disk_usage
|
|
||||||
|
|
||||||
log running e2e tests
|
log running e2e tests
|
||||||
log =================
|
log =================
|
||||||
poetry install
|
poetry install
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue