mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
fix tracing flags
This commit is contained in:
parent
509976dec6
commit
31f3d38fad
11 changed files with 61 additions and 48 deletions
|
|
@ -6,6 +6,7 @@ import yaml
|
|||
from jsonschema import validate
|
||||
from urllib.parse import urlparse
|
||||
from copy import deepcopy
|
||||
from planoai.consts import DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
|
||||
|
||||
|
||||
SUPPORTED_PROVIDERS_WITH_BASE_URL = [
|
||||
|
|
@ -158,6 +159,29 @@ def validate_and_render_schema():
|
|||
|
||||
arch_tracing = config_yaml.get("tracing", {})
|
||||
|
||||
opentracing_grpc_endpoint = arch_tracing.get(
|
||||
"opentracing_grpc_endpoint", DEFAULT_OTEL_TRACING_GRPC_ENDPOINT
|
||||
)
|
||||
arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
|
||||
# resolve env vars in opentracing_grpc_endpoint if present
|
||||
if opentracing_grpc_endpoint and "$" in opentracing_grpc_endpoint:
|
||||
opentracing_grpc_endpoint = os.path.expandvars(opentracing_grpc_endpoint)
|
||||
print(
|
||||
f"Resolved opentracing_grpc_endpoint to {opentracing_grpc_endpoint} after expanding environment variables"
|
||||
)
|
||||
arch_tracing["opentracing_grpc_endpoint"] = opentracing_grpc_endpoint
|
||||
# ensure that opentracing_grpc_endpoint is a valid URL if present and start with http and must not have any path
|
||||
if opentracing_grpc_endpoint:
|
||||
urlparse_result = urlparse(opentracing_grpc_endpoint)
|
||||
if urlparse_result.scheme != "http":
|
||||
raise Exception(
|
||||
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, scheme must be http"
|
||||
)
|
||||
if urlparse_result.path and urlparse_result.path != "/":
|
||||
raise Exception(
|
||||
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
|
||||
)
|
||||
|
||||
llms_with_endpoint = []
|
||||
llms_with_endpoint_cluster_names = set()
|
||||
updated_model_providers = []
|
||||
|
|
|
|||
|
|
@ -3,3 +3,4 @@ import os
|
|||
SERVICE_NAME_ARCHGW = "plano"
|
||||
PLANO_DOCKER_NAME = "plano"
|
||||
PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.4")
|
||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://host.docker.internal:4317"
|
||||
|
|
|
|||
|
|
@ -116,11 +116,18 @@ def stream_gateway_logs(follow, service="plano"):
|
|||
|
||||
|
||||
def docker_validate_plano_schema(arch_config_file):
|
||||
import os
|
||||
|
||||
env = os.environ.copy()
|
||||
env.pop("PATH", None)
|
||||
env_args = [item for key, value in env.items() for item in ["-e", f"{key}={value}"]]
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
"docker",
|
||||
"run",
|
||||
"--rm",
|
||||
*env_args,
|
||||
"-v",
|
||||
f"{arch_config_file}:/app/arch_config.yaml:ro",
|
||||
"--entrypoint",
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ from planoai.core import (
|
|||
start_cli_agent,
|
||||
)
|
||||
from planoai.consts import (
|
||||
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
PLANO_DOCKER_IMAGE,
|
||||
PLANO_DOCKER_NAME,
|
||||
SERVICE_NAME_ARCHGW,
|
||||
|
|
@ -153,7 +154,7 @@ def up(file, path, foreground):
|
|||
|
||||
# Set the ARCH_CONFIG_FILE environment variable
|
||||
env_stage = {
|
||||
"OTEL_TRACING_GRPC_ENDPOINT": "http://host.docker.internal:4317",
|
||||
"OTEL_TRACING_GRPC_ENDPOINT": DEFAULT_OTEL_TRACING_GRPC_ENDPOINT,
|
||||
}
|
||||
env = os.environ.copy()
|
||||
# Remove PATH variable if present
|
||||
|
|
|
|||
|
|
@ -382,6 +382,8 @@ properties:
|
|||
type: integer
|
||||
trace_arch_internal:
|
||||
type: boolean
|
||||
opentracing_grpc_endpoint:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
mode:
|
||||
type: string
|
||||
|
|
|
|||
|
|
@ -1030,35 +1030,7 @@ static_resources:
|
|||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 4317
|
||||
- name: opentelemetry_collector_http
|
||||
type: STRICT_DNS
|
||||
dns_lookup_family: V4_ONLY
|
||||
lb_policy: ROUND_ROBIN
|
||||
typed_extension_protocol_options:
|
||||
load_assignment:
|
||||
cluster_name: opentelemetry_collector_http
|
||||
endpoints:
|
||||
- lb_endpoints:
|
||||
- endpoint:
|
||||
address:
|
||||
socket_address:
|
||||
address: host.docker.internal
|
||||
port_value: 4318
|
||||
# Circuit breaker configuration to prevent overwhelming OTEL collector
|
||||
circuit_breakers:
|
||||
thresholds:
|
||||
- priority: DEFAULT
|
||||
max_connections: 100
|
||||
max_pending_requests: 100
|
||||
max_requests: 100
|
||||
max_retries: 3
|
||||
# Health checking and outlier detection
|
||||
outlier_detection:
|
||||
consecutive_5xx: 5
|
||||
interval: 10s
|
||||
base_ejection_time: 30s
|
||||
max_ejection_percent: 50
|
||||
enforcing_consecutive_5xx: 100
|
||||
{% set _otel_endpoint = arch_tracing.opentracing_grpc_endpoint | default('host.docker.internal:4317') | replace("http://", "") | replace("https://", "") %}
|
||||
address: {{ _otel_endpoint.split(":") | first }}
|
||||
port_value: {{ _otel_endpoint.split(":") | last }}
|
||||
{% endif %}
|
||||
|
|
|
|||
|
|
@ -50,13 +50,12 @@ fn empty() -> BoxBody<Bytes, hyper::Error> {
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let _tracer_provider = init_tracer();
|
||||
let bind_address = env::var("BIND_ADDRESS").unwrap_or_else(|_| BIND_ADDRESS.to_string());
|
||||
|
||||
// loading arch_config.yaml file
|
||||
// loading arch_config.yaml file (before tracing init so we can read tracing config)
|
||||
let arch_config_path = env::var("ARCH_CONFIG_PATH_RENDERED")
|
||||
.unwrap_or_else(|_| "./arch_config_rendered.yaml".to_string());
|
||||
info!(path = %arch_config_path, "loading arch_config.yaml");
|
||||
eprintln!("loading arch_config.yaml from {}", arch_config_path);
|
||||
|
||||
let config_contents =
|
||||
fs::read_to_string(&arch_config_path).expect("Failed to read arch_config.yaml");
|
||||
|
|
@ -64,6 +63,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
|||
let config: Configuration =
|
||||
serde_yaml::from_str(&config_contents).expect("Failed to parse arch_config.yaml");
|
||||
|
||||
// Initialize tracing using config.yaml tracing section
|
||||
let _tracer_provider = init_tracer(config.tracing.as_ref());
|
||||
info!(path = %arch_config_path, "loaded arch_config.yaml");
|
||||
|
||||
let arch_config = Arc::new(config);
|
||||
|
||||
// combine agents and filters into a single list of agents
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ use tracing_subscriber::util::SubscriberInitExt;
|
|||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use crate::tracing::ServiceNameOverrideExporter;
|
||||
use common::configuration::Tracing;
|
||||
|
||||
struct BracketedTime;
|
||||
|
||||
|
|
@ -80,18 +81,20 @@ use tracing_subscriber::fmt::FormattedFields;
|
|||
|
||||
static INIT_LOGGER: OnceLock<SdkTracerProvider> = OnceLock::new();
|
||||
|
||||
pub fn init_tracer() -> &'static SdkTracerProvider {
|
||||
pub fn init_tracer(tracing_config: Option<&Tracing>) -> &'static SdkTracerProvider {
|
||||
INIT_LOGGER.get_or_init(|| {
|
||||
global::set_text_map_propagator(TraceContextPropagator::new());
|
||||
|
||||
// Get OTEL collector URL from environment
|
||||
let otel_endpoint = std::env::var("OTEL_TRACING_GRPC_ENDPOINT")
|
||||
.unwrap_or_else(|_| "http://localhost:4317".to_string());
|
||||
// Get OTEL endpoint and sampling from config.yaml tracing section
|
||||
let otel_endpoint = tracing_config.and_then(|t| t.opentracing_grpc_endpoint.clone());
|
||||
|
||||
let tracing_enabled = std::env::var("OTEL_TRACING_ENABLED")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(false);
|
||||
let random_sampling = tracing_config.and_then(|t| t.random_sampling).unwrap_or(0);
|
||||
|
||||
let tracing_enabled = random_sampling > 0 && otel_endpoint.is_some();
|
||||
eprintln!(
|
||||
"initializing tracing: tracing_enabled={}, otel_endpoint={:?}, random_sampling={}",
|
||||
tracing_enabled, otel_endpoint, random_sampling
|
||||
);
|
||||
|
||||
// Create OTLP exporter to send spans to collector
|
||||
if tracing_enabled {
|
||||
|
|
@ -103,7 +106,7 @@ pub fn init_tracer() -> &'static SdkTracerProvider {
|
|||
// Create ServiceNameOverrideExporter to support per-span service names
|
||||
// This allows spans to have different service names (e.g., plano(orchestrator),
|
||||
// plano(filter), plano(llm)) by setting the "service.name.override" attribute
|
||||
let exporter = ServiceNameOverrideExporter::new(&otel_endpoint);
|
||||
let exporter = ServiceNameOverrideExporter::new(otel_endpoint.as_ref().unwrap());
|
||||
|
||||
let provider = SdkTracerProvider::builder()
|
||||
.with_batch_exporter(exporter)
|
||||
|
|
|
|||
|
|
@ -90,6 +90,8 @@ pub struct Overrides {
|
|||
pub struct Tracing {
|
||||
pub sampling_rate: Option<f64>,
|
||||
pub trace_arch_internal: Option<bool>,
|
||||
pub random_sampling: Option<u32>,
|
||||
pub opentracing_grpc_endpoint: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash, Default)]
|
||||
|
|
|
|||
|
|
@ -55,3 +55,4 @@ listeners:
|
|||
|
||||
tracing:
|
||||
random_sampling: 100
|
||||
opentracing_grpc_endpoint: $OTEL_TRACING_GRPC_ENDPOINT
|
||||
|
|
|
|||
|
|
@ -6,12 +6,10 @@ services:
|
|||
dockerfile: Dockerfile
|
||||
ports:
|
||||
- "8001:8001"
|
||||
- "11000:11000"
|
||||
- "12001:12001"
|
||||
environment:
|
||||
- ARCH_CONFIG_PATH=/app/arch_config.yaml
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?OPENAI_API_KEY environment variable is required but not set}
|
||||
- OTEL_TRACING_GRPC_ENDPOINT=http://host.docker.internal:4317
|
||||
- OTEL_TRACING_GRPC_ENDPOINT=http://jaeger:4317
|
||||
volumes:
|
||||
- ./config.yaml:/app/arch_config.yaml:ro
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
|
|
@ -60,4 +58,3 @@ services:
|
|||
ports:
|
||||
- "16686:16686" # Jaeger UI
|
||||
- "4317:4317" # OTLP gRPC receiver
|
||||
- "4318:4318" # OTLP HTTP receiver
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue