import json import os PROMPT_GATEWAY_ENDPOINT = os.getenv( "PROMPT_GATEWAY_ENDPOINT", "http://localhost:10000/v1/chat/completions" ) PROMPT_GATEWAY_PATH = os.getenv("PROMPT_GATEWAY_PATH", "/v1/chat/completions") MODEL_SERVER_FUNC_PATH = os.getenv("MODEL_SERVER_FUNC_PATH", "/function_calling") LLM_GATEWAY_ENDPOINT = os.getenv( "LLM_GATEWAY_ENDPOINT", "http://localhost:12000/v1/chat/completions" ) ARCH_STATE_HEADER = "x-arch-state" PREFILL_LIST = [ "May", "Could", "Sure", "Definitely", "Certainly", "Of course", "Can", ] TEST_CASE_FIXTURES = { "SIMPLE": { "input": { "messages": [ { "role": "user", "content": "how is the weather in seattle for next 2 days", } ] }, "model_server_response": { "id": 0, "object": "chat_completion", "created": "", "choices": [ { "id": 0, "message": { "role": "", "content": "", "tool_call_id": "", "tool_calls": [ { "id": "call_6009", "type": "function", "function": { "name": "get_current_weather", "arguments": { "location": "Seattle, WA", "days": "2", }, }, } ], }, "finish_reason": "stop", } ], "model": "Arch-Function", "metadata": {"intent_latency": "455.092", "function_latency": "312.744"}, }, "api_server_response": [ { "date": "2024-12-12", "temperature": {"min": 72, "max": 90}, "units": "Farenheit", "query_time": "2024-12-12 22:06:30.420319+00:00", }, { "date": "2024-12-13", "temperature": {"min": 52, "max": 70}, "units": "Farenheit", "query_time": "2024-12-12 22:06:30.420349+00:00", }, ], } } def get_data_chunks(stream, n=1): chunks = [] for chunk in stream.iter_lines(): if chunk: chunk = chunk.decode("utf-8") chunk_data_id = chunk[0:6] assert chunk_data_id == "data: " chunk_data = chunk[6:] chunk_data = chunk_data.strip() chunks.append(chunk_data) if len(chunks) >= n: break return chunks def get_plano_messages(response_json): plano_messages = [] if response_json and "metadata" in response_json: # load plano_state from metadata plano_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}") # parse plano_state into json object plano_state = json.loads(plano_state_str) # load messages from plano_state plano_messages_str = plano_state.get("messages", "[]") # parse messages into json object plano_messages = json.loads(plano_messages_str) # append messages from plano gateway to history return plano_messages return []