add compression/decompression

2026-06-17 15:25:17 +02:00 · 2024-10-24 17:47:54 -07:00 · 2024-10-24 17:47:54 -07:00 · a5cbd2a978
commit a5cbd2a978
parent 6eceabf43e
2 changed files with 22 additions and 10 deletions
--- a/arch/envoy.template.yaml
+++ b/arch/envoy.template.yaml
@ -52,6 +52,15 @@ static_resources:
                            cluster: arch_llm_listener
                            timeout: 60s
                http_filters:
+                  - name: envoy.filters.http.compressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
+                      compressor_library:
+                        name: compress
+                        typed_config:
+                          "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
+                          memory_level: 3
+                          window_bits: 10
                  - name: envoy.filters.http.wasm
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@ -69,6 +78,17 @@ static_resources:
                            code:
                              local:
                                filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
+                  - name: envoy.filters.http.decompressor
+                    typed_config:
+                      "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
+                      decompressor_library:
+                        name: decompress
+                        typed_config:
+                          "@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
+                          window_bits: 9
+                          chunk_size: 8192
+                          # If this ratio is set too low, then body data will not be decompressed completely.
+                          max_inflate_ratio: 1000
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@ -206,8 +226,6 @@ static_resources:
                            body:
                              inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
                http_filters:
-
-
                  - name: envoy.filters.http.compressor
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
@ -217,7 +235,6 @@ static_resources:
                          "@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
                          memory_level: 3
                          window_bits: 10
-
                  - name: envoy.filters.http.wasm
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
@ -235,9 +252,6 @@ static_resources:
                            code:
                              local:
                                filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
-
-
-
                  - name: envoy.filters.http.decompressor
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
@ -249,8 +263,6 @@ static_resources:
                          chunk_size: 8192
                          # If this ratio is set too low, then body data will not be decompressed completely.
                          max_inflate_ratio: 1000
-
-
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
--- a/chatbot_ui/app/run.py
+++ b/chatbot_ui/app/run.py
@ -6,7 +6,7 @@ from arch_util import get_arch_messages
 import gradio as gr

 from typing import List, Optional, Tuple
-from openai import OpenAI, DefaultHttpxClient
+from openai import OpenAI
 from dotenv import load_dotenv

 load_dotenv()
@ -39,7 +39,7 @@ footer {visibility: hidden}
 client = OpenAI(
    api_key="--",
    base_url=CHAT_COMPLETION_ENDPOINT,
-    http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
+    # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
 )