From a5cbd2a9780a436aacdc3793850e8cb3ca943986 Mon Sep 17 00:00:00 2001
From: Adil Hafeez <adil@katanemo.com>
Date: Thu, 24 Oct 2024 17:47:54 -0700
Subject: [PATCH] add compression/decompression

---
 arch/envoy.template.yaml | 28 ++++++++++++++++++++--------
 chatbot_ui/app/run.py    |  4 ++--
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/arch/envoy.template.yaml b/arch/envoy.template.yaml
index e64ac422..076c461e 100644
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@@ -52,6 +52,15 @@ static_resources:
                             cluster: arch_llm_listener
                             timeout: 60s
                 http_filters:
+                  - name: envoy.filters.http.compressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
+                      compressor_library:
+                        name: compress
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
+                          memory_level: 3
+                          window_bits: 10
                   - name: envoy.filters.http.wasm
                     typed_config:
                       "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@@ -69,6 +78,17 @@ static_resources:
                             code:
                               local:
                                 filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
+                  - name: envoy.filters.http.decompressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
+                      decompressor_library:
+                        name: decompress
+                        typed_config:
+                          "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
+                          window_bits: 9
+                          chunk_size: 8192
+                          # If this ratio is set too low, then body data will not be decompressed completely.
+                          max_inflate_ratio: 1000
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@@ -206,8 +226,6 @@ static_resources:
                             body:
                               inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
                 http_filters:
-
-
                   - name: envoy.filters.http.compressor
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
@@ -217,7 +235,6 @@ static_resources:
                           "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
                           memory_level: 3
                           window_bits: 10
-
                   - name: envoy.filters.http.wasm
                     typed_config:
                       "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@@ -235,9 +252,6 @@ static_resources:
                             code:
                               local:
                                 filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
-
-
-
                   - name: envoy.filters.http.decompressor
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
@@ -249,8 +263,6 @@ static_resources:
                           chunk_size: 8192
                           # If this ratio is set too low, then body data will not be decompressed completely.
                           max_inflate_ratio: 1000
-
-
                   - name: envoy.filters.http.router
                     typed_config:
                       "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
diff --git a/chatbot_ui/app/run.py b/chatbot_ui/app/run.py
index d3c9dbd3..0d504337 100644
--- a/chatbot_ui/app/run.py
+++ b/chatbot_ui/app/run.py
@@ -6,7 +6,7 @@ from arch_util import get_arch_messages
 import gradio as gr
 
 from typing import List, Optional, Tuple
-from openai import OpenAI, DefaultHttpxClient
+from openai import OpenAI
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -39,7 +39,7 @@ footer {visibility: hidden}
 client = OpenAI(
     api_key="--",
     base_url=CHAT_COMPLETION_ENDPOINT,
-    http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
+    # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
 )