mirror of
https://github.com/katanemo/plano.git
synced 2026-07-02 15:51:02 +02:00
Add support for streaming and fixes few issues (see description) (#202)
This commit is contained in:
parent
29ff8da60f
commit
662a840ac5
45 changed files with 2266 additions and 477 deletions
|
|
@ -12,6 +12,9 @@ FROM envoyproxy/envoy:v1.31-latest as envoy
|
|||
|
||||
#Build config generator, so that we have a single build image for both Rust and Python
|
||||
FROM python:3-slim as arch
|
||||
|
||||
RUN apt-get update && apt-get install -y gettext-base && apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /arch/target/wasm32-wasi/release/prompt_gateway.wasm /etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||
COPY --from=builder /arch/target/wasm32-wasi/release/llm_gateway.wasm /etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||
COPY --from=envoy /usr/local/bin/envoy /usr/local/bin/envoy
|
||||
|
|
@ -22,4 +25,5 @@ COPY arch/tools/cli/config_generator.py .
|
|||
COPY arch/envoy.template.yaml .
|
||||
COPY arch/arch_config_schema.yaml .
|
||||
|
||||
CMD ["sh", "-c", "python config_generator.py && envoy -c /etc/envoy/envoy.yaml --component-log-level wasm:debug"]
|
||||
|
||||
ENTRYPOINT ["sh", "-c", "python config_generator.py && envsubst < /etc/envoy/envoy.yaml > /etc/envoy.env_sub.yaml && envoy -c /etc/envoy.env_sub.yaml --component-log-level wasm:debug"]
|
||||
|
|
|
|||
|
|
@ -160,4 +160,3 @@ required:
|
|||
- version
|
||||
- listener
|
||||
- llm_providers
|
||||
- prompt_targets
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
docker build -t archgw .. -f Dockerfile
|
||||
docker build -f Dockerfile .. -t katanemo/archgw
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
services:
|
||||
archgw:
|
||||
image: archgw:latest
|
||||
image: katanemo/archgw:latest
|
||||
ports:
|
||||
- "10000:10000"
|
||||
- "11000:11000"
|
||||
|
|
@ -10,9 +10,13 @@ services:
|
|||
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ./envoy.template.yaml:/config/envoy.template.yaml
|
||||
- ./target/wasm32-wasi/release/intelligent_prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/intelligent_prompt_gateway.wasm
|
||||
- ./arch_config_schema.yaml:/config/arch_config_schema.yaml
|
||||
- ./tools/config_generator.py:/config/config_generator.py
|
||||
- ./arch_logs:/var/log/
|
||||
env_file:
|
||||
- stage.env
|
||||
- ./tools/cli/config_generator.py:/config/config_generator.py
|
||||
- ../crates/target/wasm32-wasi/release/llm_gateway.wasm:/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm
|
||||
- ../crates/target/wasm32-wasi/release/prompt_gateway.wasm:/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm
|
||||
- ~/archgw_logs:/var/log/
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
|
|
|
|||
17
arch/docker-compose.e2e.yaml
Normal file
17
arch/docker-compose.e2e.yaml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
services:
|
||||
archgw:
|
||||
image: katanemo/archgw:latest
|
||||
ports:
|
||||
- "10000:10000"
|
||||
- "11000:11000"
|
||||
- "12000:12000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ~/archgw_logs:/var/log/
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
environment:
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY:?error}
|
||||
- MISTRAL_API_KEY=${MISTRAL_API_KEY:?error}
|
||||
|
|
@ -7,7 +7,7 @@ services:
|
|||
- "12000:12000"
|
||||
- "19901:9901"
|
||||
volumes:
|
||||
- ${ARCH_CONFIG_FILE:-./demos/function_calling/arch_confg.yaml}:/config/arch_config.yaml
|
||||
- ${ARCH_CONFIG_FILE:-../demos/function_calling/arch_config.yaml}:/config/arch_config.yaml
|
||||
- /etc/ssl/cert.pem:/etc/ssl/cert.pem
|
||||
- ~/archgw_logs:/var/log/
|
||||
env_file:
|
||||
|
|
|
|||
|
|
@ -52,6 +52,15 @@ static_resources:
|
|||
cluster: arch_llm_listener
|
||||
timeout: 60s
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
|
|
@ -69,6 +78,17 @@ static_resources:
|
|||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/prompt_gateway.wasm"
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
window_bits: 9
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
|
@ -187,6 +207,12 @@ static_resources:
|
|||
domains:
|
||||
- "*"
|
||||
routes:
|
||||
- match:
|
||||
prefix: "/healthz"
|
||||
route:
|
||||
auto_host_rewrite: true
|
||||
cluster: openai
|
||||
timeout: 60s
|
||||
{% for provider in arch_llm_providers %}
|
||||
- match:
|
||||
prefix: "/"
|
||||
|
|
@ -206,6 +232,15 @@ static_resources:
|
|||
body:
|
||||
inline_string: "x-arch-llm-provider header not set, llm gateway cannot perform routing\n"
|
||||
http_filters:
|
||||
- name: envoy.filters.http.compressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.compressor.v3.Compressor
|
||||
compressor_library:
|
||||
name: compress
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.compression.gzip.compressor.v3.Gzip
|
||||
memory_level: 3
|
||||
window_bits: 10
|
||||
- name: envoy.filters.http.wasm
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/udpa.type.v1.TypedStruct
|
||||
|
|
@ -223,6 +258,17 @@ static_resources:
|
|||
code:
|
||||
local:
|
||||
filename: "/etc/envoy/proxy-wasm-plugins/llm_gateway.wasm"
|
||||
- name: envoy.filters.http.decompressor
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.decompressor.v3.Decompressor
|
||||
decompressor_library:
|
||||
name: decompress
|
||||
typed_config:
|
||||
"@type": "type.googleapis.com/envoy.extensions.compression.gzip.decompressor.v3.Gzip"
|
||||
window_bits: 9
|
||||
chunk_size: 8192
|
||||
# If this ratio is set too low, then body data will not be decompressed completely.
|
||||
max_inflate_ratio: 1000
|
||||
- name: envoy.filters.http.router
|
||||
typed_config:
|
||||
"@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
|
||||
|
|
|
|||
|
|
@ -47,13 +47,14 @@ def validate_and_render_schema():
|
|||
config_schema_yaml = yaml.safe_load(arch_config_schema)
|
||||
inferred_clusters = {}
|
||||
|
||||
for prompt_target in config_yaml["prompt_targets"]:
|
||||
name = prompt_target.get("endpoint", {}).get("name", "")
|
||||
if name not in inferred_clusters:
|
||||
inferred_clusters[name] = {
|
||||
"name": name,
|
||||
"port": 80, # default port
|
||||
}
|
||||
if "prompt_targets" in config_yaml:
|
||||
for prompt_target in config_yaml["prompt_targets"]:
|
||||
name = prompt_target.get("endpoint", {}).get("name", "")
|
||||
if name not in inferred_clusters:
|
||||
inferred_clusters[name] = {
|
||||
"name": name,
|
||||
"port": 80, # default port
|
||||
}
|
||||
|
||||
print(inferred_clusters)
|
||||
endpoints = config_yaml.get("endpoints", {})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue