From a5cbd2a9780a436aacdc3793850e8cb3ca943986 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Thu, 24 Oct 2024 17:47:54 -0700 Subject: [PATCH] add compression/decompression --- arch/envoy.template.yaml | 28 ++++++++++++++++++++-------- chatbot_ui/app/run.py | 4 ++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml index e64ac422..076c461e 100644 --- a/arch/envoy.template.yaml +++ b/arch/envoy.template.yaml @@ -52,6 +52,15 @@ static_resources: cluster: arch_llm_listener timeout: 60s http_filters: + - name: envoy.filters.http.compressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor + compressor_library: + name: compress + typed_config: + "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip + memory_level: 3 + window_bits: 10 - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct @@ -69,6 +78,17 @@ static_resources: code: local: filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm" + - name: envoy.filters.http.decompressor + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor + decompressor_library: + name: decompress + typed_config: + "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip" + window_bits: 9 + chunk_size: 8192 + # If this ratio is set too low, then body data will not be decompressed completely. + max_inflate_ratio: 1000 - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router @@ -206,8 +226,6 @@ static_resources: body: inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n" http_filters: - - - name: envoy.filters.http.compressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor @@ -217,7 +235,6 @@ static_resources: "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip memory_level: 3 window_bits: 10 - - name: envoy.filters.http.wasm typed_config: "@type": type.googleapis.com/udpa.type.v1.TypedStruct @@ -235,9 +252,6 @@ static_resources: code: local: filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm" - - - - name: envoy.filters.http.decompressor typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor @@ -249,8 +263,6 @@ static_resources: chunk_size: 8192 # If this ratio is set too low, then body data will not be decompressed completely. max_inflate_ratio: 1000 - - - name: envoy.filters.http.router typed_config: "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router diff --git a/chatbot_ui/app/run.py b/chatbot_ui/app/run.py index d3c9dbd3..0d504337 100644 --- a/chatbot_ui/app/run.py +++ b/chatbot_ui/app/run.py @@ -6,7 +6,7 @@ from arch_util import get_arch_messages import gradio as gr from typing import List, Optional, Tuple -from openai import OpenAI, DefaultHttpxClient +from openai import OpenAI from dotenv import load_dotenv load_dotenv() @@ -39,7 +39,7 @@ footer {visibility: hidden} client = OpenAI( api_key="--", base_url=CHAT_COMPLETION_ENDPOINT, - http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}), + # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}), )