mirror of
https://github.com/katanemo/plano.git
synced 2026-05-24 14:05:14 +02:00
Add support for streaming and fixes few issues (see description) (#202)
This commit is contained in:
parent
29ff8da60f
commit
662a840ac5
45 changed files with 2266 additions and 477 deletions
42
e2e_tests/common.py
Normal file
42
e2e_tests/common.py
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
import json
|
||||
import os
|
||||
|
||||
|
||||
PROMPT_GATEWAY_ENDPOINT = os.getenv(
|
||||
"PROMPT_GATEWAY_ENDPOINT", "http://localhost:10000/v1/chat/completions"
|
||||
)
|
||||
LLM_GATEWAY_ENDPOINT = os.getenv(
|
||||
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
||||
)
|
||||
ARCH_STATE_HEADER = "x-arch-state"
|
||||
|
||||
|
||||
def get_data_chunks(stream, n=1):
|
||||
chunks = []
|
||||
for chunk in stream.iter_lines():
|
||||
if chunk:
|
||||
chunk = chunk.decode("utf-8")
|
||||
chunk_data_id = chunk[0:6]
|
||||
assert chunk_data_id == "data: "
|
||||
chunk_data = chunk[6:]
|
||||
chunk_data = chunk_data.strip()
|
||||
chunks.append(chunk_data)
|
||||
if len(chunks) >= n:
|
||||
break
|
||||
return chunks
|
||||
|
||||
|
||||
def get_arch_messages(response_json):
|
||||
arch_messages = []
|
||||
if response_json and "metadata" in response_json:
|
||||
# load arch_state from metadata
|
||||
arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
|
||||
# parse arch_state into json object
|
||||
arch_state = json.loads(arch_state_str)
|
||||
# load messages from arch_state
|
||||
arch_messages_str = arch_state.get("messages", "[]")
|
||||
# parse messages into json object
|
||||
arch_messages = json.loads(arch_messages_str)
|
||||
# append messages from arch gateway to history
|
||||
return arch_messages
|
||||
return []
|
||||
Loading…
Add table
Add a link
Reference in a new issue