diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index a1ed4472..40ddd9a8 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -25,7 +25,7 @@ static_resources: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s - service_name: arch + service_name: front_end random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} @@ -38,6 +38,59 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog path: "/var/log/access_ingress.log" + route_config: + name: local_routes + virtual_hosts: + - name: local_service + domains: + - "*" + routes: + - match: + prefix: "/" + route: + auto_host_rewrite: true + cluster: arch_prompt_gateway_listener + timeout: 60s + http_filters: + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + + - name: arch_prompt_gateway_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 10001 + traffic_direction: INBOUND + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + {% if "random_sampling" in arch_tracing and arch_tracing["random_sampling"] > 0 %} + generate_request_id: true + tracing: + provider: + name: envoy.tracers.opentelemetry + typed_config: + "@type": type.googleapis.com/envoy.config.trace.v3.OpenTelemetryConfig + grpc_service: + envoy_grpc: + cluster_name: opentelemetry_collector + timeout: 0.250s + service_name: prompt_gateway + random_sampling: + value: {{ arch_tracing.random_sampling }} + {% endif %} + stat_prefix: arch_prompt_gateway_listener + codec_type: AUTO + scheme_header_transformation: + scheme_to_overwrite: https + access_log: + - name: envoy.access_loggers.file + typed_config: + "@type": type.googleapis.com/envoy.extensions.access_loggers.file.v3.FileAccessLog + path: "/var/log/access_ingress_prompt.log" route_config: name: local_routes virtual_hosts: @@ -93,6 +146,7 @@ static_resources: typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + - name: arch_internal address: socket_address: @@ -115,7 +169,7 @@ static_resources: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s - service_name: arch + service_name: arch_internal random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} @@ -187,7 +241,7 @@ static_resources: envoy_grpc: cluster_name: opentelemetry_collector timeout: 0.250s - service_name: arch + service_name: llm_gateway random_sampling: value: {{ arch_tracing.random_sampling }} {% endif %} @@ -387,6 +441,22 @@ static_resources: port_value: 11000 hostname: arch_internal + - name: arch_prompt_gateway_listener + connect_timeout: 5s + type: LOGICAL_DNS + dns_lookup_family: V4_ONLY + lb_policy: ROUND_ROBIN + load_assignment: + cluster_name: arch_prompt_gateway_listener + endpoints: + - lb_endpoints: + - endpoint: + address: + socket_address: + address: 0.0.0.0 + port_value: 10001 + hostname: arch_prompt_gateway_listener + - name: arch_llm_listener connect_timeout: 5s type: LOGICAL_DNS diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index f782cf99..7ac5ea1c 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -22,6 +22,7 @@ pub const HEALTHZ_PATH: &str = "/healthz"; pub const ARCH_STATE_HEADER: &str = "x-arch-state"; pub const ARCH_FC_MODEL_NAME: &str = "Arch-Function-1.5B"; pub const REQUEST_ID_HEADER: &str = "x-request-id"; +pub const TRACE_PARENT_HEADER: &str = "traceparent"; pub const ARCH_INTERNAL_CLUSTER_NAME: &str = "arch_internal"; pub const ARCH_UPSTREAM_HOST_HEADER: &str = "x-arch-upstream"; pub const ARCH_LLM_UPSTREAM_LISTENER: &str = "arch_llm_listener"; diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index 3b014009..e2bb67f5 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -8,9 +8,7 @@ use common::{ PromptGuardRequest, PromptGuardTask, }, consts::{ - ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER, - ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, GUARD_INTERNAL_HOST, - HEALTHZ_PATH, REQUEST_ID_HEADER, TOOL_ROLE, USER_ROLE, + ARCH_FC_MODEL_NAME, ARCH_INTERNAL_CLUSTER_NAME, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, CHAT_COMPLETIONS_PATH, GUARD_INTERNAL_HOST, HEALTHZ_PATH, REQUEST_ID_HEADER, TOOL_ROLE, TRACE_PARENT_HEADER, USER_ROLE }, errors::ServerError, http::{CallArgs, Client}, @@ -52,6 +50,7 @@ impl HttpContext for StreamContext { ); self.request_id = self.get_http_request_header(REQUEST_ID_HEADER); + self.traceparent = self.get_http_request_header(TRACE_PARENT_HEADER); Action::Continue } @@ -195,6 +194,10 @@ impl HttpContext for StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, "/guard", diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 4bbd3fa6..b6fd0070 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -16,8 +16,8 @@ use common::consts::{ ARCH_INTERNAL_CLUSTER_NAME, ARCH_MODEL_PREFIX, ARCH_STATE_HEADER, ARCH_UPSTREAM_HOST_HEADER, ASSISTANT_ROLE, DEFAULT_EMBEDDING_MODEL, DEFAULT_HALLUCINATED_THRESHOLD, DEFAULT_INTENT_MODEL, DEFAULT_PROMPT_TARGET_THRESHOLD, EMBEDDINGS_INTERNAL_HOST, HALLUCINATION_INTERNAL_HOST, - HALLUCINATION_TEMPLATE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, USER_ROLE, - ZEROSHOT_INTERNAL_HOST, + HALLUCINATION_TEMPLATE, MESSAGES_KEY, REQUEST_ID_HEADER, SYSTEM_ROLE, TOOL_ROLE, + TRACE_PARENT_HEADER, USER_ROLE, ZEROSHOT_INTERNAL_HOST, }; use common::embeddings::{ CreateEmbeddingRequest, CreateEmbeddingRequestInput, CreateEmbeddingResponse, @@ -77,6 +77,7 @@ pub struct StreamContext { pub chat_completions_request: Option, pub prompt_guards: Rc, pub request_id: Option, + pub traceparent: Option, } impl StreamContext { @@ -107,6 +108,7 @@ impl StreamContext { prompt_guards, overrides, request_id: None, + traceparent: None, } } fn embeddings_store(&self) -> &EmbeddingsStore { @@ -154,9 +156,15 @@ impl StreamContext { ("x-envoy-max-retries", "3"), ("x-envoy-upstream-rq-timeout-ms", "60000"), ]; + if self.request_id.is_some() { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, "/embeddings", @@ -282,6 +290,10 @@ impl StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, "/zeroshot", @@ -481,6 +493,10 @@ impl StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, &upstream_path, @@ -597,6 +613,10 @@ impl StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, "/v1/chat/completions", @@ -773,6 +793,10 @@ impl StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, "/hallucination", @@ -824,6 +848,10 @@ impl StreamContext { headers.push((REQUEST_ID_HEADER, self.request_id.as_ref().unwrap())); } + if self.traceparent.is_some() { + headers.push((TRACE_PARENT_HEADER, self.traceparent.as_ref().unwrap())); + } + let call_args = CallArgs::new( ARCH_INTERNAL_CLUSTER_NAME, &path,