mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 00:36:34 +02:00
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
import json
|
|
import os
|
|
|
|
|
|
PROMPT_GATEWAY_ENDPOINT = os.getenv(
|
|
"PROMPT_GATEWAY_ENDPOINT", "http://localhost:10000/v1/chat/completions"
|
|
)
|
|
LLM_GATEWAY_ENDPOINT = os.getenv(
|
|
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
|
)
|
|
ARCH_STATE_HEADER = "x-arch-state"
|
|
|
|
|
|
def get_data_chunks(stream, n=1):
|
|
chunks = []
|
|
for chunk in stream.iter_lines():
|
|
if chunk:
|
|
chunk = chunk.decode("utf-8")
|
|
chunk_data_id = chunk[0:6]
|
|
assert chunk_data_id == "data: "
|
|
chunk_data = chunk[6:]
|
|
chunk_data = chunk_data.strip()
|
|
chunks.append(chunk_data)
|
|
if len(chunks) >= n:
|
|
break
|
|
return chunks
|
|
|
|
|
|
def get_arch_messages(response_json):
|
|
arch_messages = []
|
|
if response_json and "metadata" in response_json:
|
|
# load arch_state from metadata
|
|
arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
|
|
# parse arch_state into json object
|
|
arch_state = json.loads(arch_state_str)
|
|
# load messages from arch_state
|
|
arch_messages_str = arch_state.get("messages", "[]")
|
|
# parse messages into json object
|
|
arch_messages = json.loads(arch_messages_str)
|
|
# append messages from arch gateway to history
|
|
return arch_messages
|
|
return []
|