mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
add compression/decompression
This commit is contained in:
parent
6eceabf43e
commit
a5cbd2a978
2 changed files with 22 additions and 10 deletions
|
|
@ -52,6 +52,15 @@ static_resources:
|
|||
cluster: arch_llm_listener
|
||||
timeout: 60s
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
|
|
@ -69,6 +78,17 @@ static_resources:
|
|||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
window_bits: 9
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
|
@ -206,8 +226,6 @@ static_resources:
|
|||
body:
|
||||
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
||||
http_filters:
|
||||
|
||||
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
|
|
@ -217,7 +235,6 @@ static_resources:
|
|||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
|
|
@ -235,9 +252,6 @@ static_resources:
|
|||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
||||
|
||||
|
||||
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
|
|
@ -249,8 +263,6 @@ static_resources:
|
|||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
|
||||
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ from arch_util import get_arch_messages
|
|||
import gradio as gr
|
||||
|
||||
from typing import List, Optional, Tuple
|
||||
from openai import OpenAI, DefaultHttpxClient
|
||||
from openai import OpenAI
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
|
@ -39,7 +39,7 @@ footer {visibility: hidden}
|
|||
client = OpenAI(
|
||||
api_key="--",
|
||||
base_url=CHAT_COMPLETION_ENDPOINT,
|
||||
http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
|
||||
# http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue