add compression/decompression

This commit is contained in:
Adil Hafeez 2024-10-24 17:47:54 -07:00
parent 6eceabf43e
commit a5cbd2a978
2 changed files with 22 additions and 10 deletions

View file

@ -52,6 +52,15 @@ static_resources:
cluster: arch_llm_listener
timeout: 60s
http_filters:
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
compressor_library:
name: compress
typed_config:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
@ -69,6 +78,17 @@ static_resources:
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
decompressor_library:
name: decompress
typed_config:
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
window_bits: 9
chunk_size: 8192
# If this ratio is set too low, then body data will not be decompressed completely.
max_inflate_ratio: 1000
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
@ -206,8 +226,6 @@ static_resources:
body:
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
http_filters:
- name: envoy.filters.http.compressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
@ -217,7 +235,6 @@ static_resources:
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
memory_level: 3
window_bits: 10
- name: envoy.filters.http.wasm
typed_config:
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
@ -235,9 +252,6 @@ static_resources:
code:
local:
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
- name: envoy.filters.http.decompressor
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
@ -249,8 +263,6 @@ static_resources:
chunk_size: 8192
# If this ratio is set too low, then body data will not be decompressed completely.
max_inflate_ratio: 1000
- name: envoy.filters.http.router
typed_config:
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router

View file

@ -6,7 +6,7 @@ from arch_util import get_arch_messages
import gradio as gr
from typing import List, Optional, Tuple
from openai import OpenAI, DefaultHttpxClient
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
@ -39,7 +39,7 @@ footer {visibility: hidden}
client = OpenAI(
api_key="--",
base_url=CHAT_COMPLETION_ENDPOINT,
http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
# http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
)