mirror of
https://github.com/katanemo/plano.git
synced 2026-04-25 08:46:24 +02:00
* Update pre-commit black to latest release * Reformat Python files for new black version
113 lines
3.5 KiB
Python
113 lines
3.5 KiB
Python
import json
|
|
import os
|
|
|
|
PROMPT_GATEWAY_ENDPOINT = os.getenv(
|
|
"PROMPT_GATEWAY_ENDPOINT", "http://localhost:10000/v1/chat/completions"
|
|
)
|
|
|
|
PROMPT_GATEWAY_PATH = os.getenv("PROMPT_GATEWAY_PATH", "/v1/chat/completions")
|
|
MODEL_SERVER_FUNC_PATH = os.getenv("MODEL_SERVER_FUNC_PATH", "/function_calling")
|
|
|
|
LLM_GATEWAY_ENDPOINT = os.getenv(
|
|
"LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions"
|
|
)
|
|
ARCH_STATE_HEADER = "x-arch-state"
|
|
|
|
PREFILL_LIST = [
|
|
"May",
|
|
"Could",
|
|
"Sure",
|
|
"Definitely",
|
|
"Certainly",
|
|
"Of course",
|
|
"Can",
|
|
]
|
|
|
|
TEST_CASE_FIXTURES = {
|
|
"SIMPLE": {
|
|
"input": {
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": "how is the weather in seattle for next 2 days",
|
|
}
|
|
]
|
|
},
|
|
"model_server_response": {
|
|
"id": 0,
|
|
"object": "chat_completion",
|
|
"created": "",
|
|
"choices": [
|
|
{
|
|
"id": 0,
|
|
"message": {
|
|
"role": "",
|
|
"content": "",
|
|
"tool_call_id": "",
|
|
"tool_calls": [
|
|
{
|
|
"id": "call_6009",
|
|
"type": "function",
|
|
"function": {
|
|
"name": "get_current_weather",
|
|
"arguments": {
|
|
"location": "Seattle, WA",
|
|
"days": "2",
|
|
},
|
|
},
|
|
}
|
|
],
|
|
},
|
|
"finish_reason": "stop",
|
|
}
|
|
],
|
|
"model": "Arch-Function",
|
|
"metadata": {"intent_latency": "455.092", "function_latency": "312.744"},
|
|
},
|
|
"api_server_response": [
|
|
{
|
|
"date": "2024-12-12",
|
|
"temperature": {"min": 72, "max": 90},
|
|
"units": "Farenheit",
|
|
"query_time": "2024-12-12 22:06:30.420319+00:00",
|
|
},
|
|
{
|
|
"date": "2024-12-13",
|
|
"temperature": {"min": 52, "max": 70},
|
|
"units": "Farenheit",
|
|
"query_time": "2024-12-12 22:06:30.420349+00:00",
|
|
},
|
|
],
|
|
}
|
|
}
|
|
|
|
|
|
def get_data_chunks(stream, n=1):
|
|
chunks = []
|
|
for chunk in stream.iter_lines():
|
|
if chunk:
|
|
chunk = chunk.decode("utf-8")
|
|
chunk_data_id = chunk[0:6]
|
|
assert chunk_data_id == "data: "
|
|
chunk_data = chunk[6:]
|
|
chunk_data = chunk_data.strip()
|
|
chunks.append(chunk_data)
|
|
if len(chunks) >= n:
|
|
break
|
|
return chunks
|
|
|
|
|
|
def get_plano_messages(response_json):
|
|
plano_messages = []
|
|
if response_json and "metadata" in response_json:
|
|
# load plano_state from metadata
|
|
plano_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
|
|
# parse plano_state into json object
|
|
plano_state = json.loads(plano_state_str)
|
|
# load messages from plano_state
|
|
plano_messages_str = plano_state.get("messages", "[]")
|
|
# parse messages into json object
|
|
plano_messages = json.loads(plano_messages_str)
|
|
# append messages from plano gateway to history
|
|
return plano_messages
|
|
return []
|