From bd4e72b48e545223857491d27881cf88ceada3b8 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Mon, 28 Oct 2024 23:43:08 -0700 Subject: [PATCH] remove streaming for run.py --- chatbot_ui/run.py | 141 +++++++--------------------------------------- 1 file changed, 21 insertions(+), 120 deletions(-) diff --git a/chatbot_ui/run.py b/chatbot_ui/run.py index 7f8e2227..a774dce5 100644 --- a/chatbot_ui/run.py +++ b/chatbot_ui/run.py @@ -11,7 +11,6 @@ from dotenv import load_dotenv load_dotenv() -STREAM_RESPONSE = bool(os.getenv("STREAM_RESPOSE", True)) logging.basicConfig( level=logging.INFO, @@ -39,54 +38,9 @@ footer {visibility: hidden} client = OpenAI( api_key="--", base_url=CHAT_COMPLETION_ENDPOINT, - # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}), ) -def convert_prompt_target_to_openai_format(target): - tool = { - "description": target["description"], - "parameters": {"type": "object", "properties": {}, "required": []}, - } - - if "parameters" in target: - for param_info in target["parameters"]: - parameter = { - "type": param_info["type"], - "description": param_info["description"], - } - - for key in ["default", "format", "enum", "items", "minimum", "maximum"]: - if key in param_info: - parameter[key] = param_info[key] - - tool["parameters"]["properties"][param_info["name"]] = parameter - - required = param_info.get("required", False) - if required: - tool["parameters"]["required"].append(param_info["name"]) - - return {"name": target["name"], "info": tool} - - -def get_prompt_targets(): - try: - with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file: - config = yaml.safe_load(file) - - available_tools = [] - for target in config["prompt_targets"]: - if not target.get("default", False): - available_tools.append( - convert_prompt_target_to_openai_format(target) - ) - - return {tool["name"]: tool["info"] for tool in available_tools} - except Exception as e: - log.info(e) - return None - - def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], state): if "history" not in state: state["history"] = [] @@ -95,97 +49,44 @@ def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], st history.append({"role": "user", "content": query}) log.info(f"history: {history}") - # Custom headers - custom_headers = { - "x-arch-deterministic-provider": "openai", - } - try: raw_response = client.chat.completions.with_raw_response.create( model="--", messages=history, temperature=1.0, - # metadata=metadata, - extra_headers=custom_headers, - stream=STREAM_RESPONSE, ) except Exception as e: - log.info(e) - # remove last user message in case of exception history.pop() - log.info("Error calling gateway API: {}".format(e)) + # remove last user message in case of exception + log.error("Error calling gateway API: {}".format(e)) raise gr.Error("Error calling gateway API: {}".format(e)) - if STREAM_RESPONSE: - response = raw_response.parse() - history.append({"role": "assistant", "content": "", "model": ""}) - conversation.append((query, "")) - # for gradio UI we don't want to show raw tool calls and messages from developer application - # so we're filtering those out - history_view = [h for h in history if h["role"] != "tool" and "content" in h] + log.error(f"raw_response: {raw_response.text}") + response = raw_response.parse() - for chunk in response: - print("chunk: " + str(chunk.to_dict())) - if len(chunk.choices) > 0: - if chunk.choices[0].delta.role: - # create new history item if role changes - # this is likely due to arch tool call and api response - if history[-1]["role"] != chunk.choices[0].delta.role: - history.append( - { - "role": chunk.choices[0].delta.role, - "content": chunk.choices[0].delta.content, - "model": chunk.model, - "tool_calls": chunk.choices[0].delta.tool_calls, - } - ) + # extract arch_state from metadata and store it in gradio session state + # this state must be passed back to the gateway in the next request + response_json = json.loads(raw_response.text) + log.info(response_json) - history[-1]["model"] = chunk.model - if chunk.choices[0].delta.content: - if not history[-1]["content"]: - history[-1]["content"] = "" - history[-1]["content"] = ( - history[-1]["content"] + chunk.choices[0].delta.content - ) - if chunk.choices[0].delta.tool_calls: - history[-1]["tool_calls"] = chunk.choices[0].delta.tool_calls + arch_messages = get_arch_messages(response_json) + for arch_message in arch_messages: + history.append(arch_message) - if history[-1]["role"] != "tool": - if chunk.model and chunk.choices[0].delta.content != "": - conversation[-1] = ( - conversation[-1][0], - conversation[-1][1] + chunk.choices[0].delta.content, - ) - yield "", conversation, state - else: - log.error(f"raw_response: {raw_response.text}") - response = raw_response.parse() + content = response.choices[0].message.content - # extract arch_state from metadata and store it in gradio session state - # this state must be passed back to the gateway in the next request - response_json = json.loads(raw_response.text) - log.info(response_json) + history.append({"role": "assistant", "content": content, "model": response.model}) - arch_messages = get_arch_messages(response_json) - for arch_message in arch_messages: - history.append(arch_message) + # for gradio UI we don't want to show raw tool calls and messages from developer application + # so we're filtering those out + history_view = [h for h in history if h["role"] != "tool" and "content" in h] - content = response.choices[0].message.content + conversation = [ + (history_view[i]["content"], history_view[i + 1]["content"]) + for i in range(0, len(history_view) - 1, 2) + ] - history.append( - {"role": "assistant", "content": content, "model": response.model} - ) - - # for gradio UI we don't want to show raw tool calls and messages from developer application - # so we're filtering those out - history_view = [h for h in history if h["role"] != "tool" and "content" in h] - - conversation = [ - (history_view[i]["content"], history_view[i + 1]["content"]) - for i in range(0, len(history_view) - 1, 2) - ] - - yield "", conversation, state + yield "", conversation, state def main():