remove streaming for run.py

2026-06-17 15:25:17 +02:00 · 2024-10-28 23:43:08 -07:00 · 2024-10-28 23:43:08 -07:00 · bd4e72b48e
commit bd4e72b48e
parent e8498f5d25
1 changed files with 21 additions and 120 deletions
--- a/chatbot_ui/run.py
+++ b/chatbot_ui/run.py
@ -11,7 +11,6 @@ from dotenv import load_dotenv

 load_dotenv()

-STREAM_RESPONSE = bool(os.getenv("STREAM_RESPOSE", True))

 logging.basicConfig(
    level=logging.INFO,
@ -39,54 +38,9 @@ footer {visibility: hidden}
 client = OpenAI(
    api_key="--",
    base_url=CHAT_COMPLETION_ENDPOINT,
-    # http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
 )


-def convert_prompt_target_to_openai_format(target):
-    tool = {
-        "description": target["description"],
-        "parameters": {"type": "object", "properties": {}, "required": []},
-    }
-
-    if "parameters" in target:
-        for param_info in target["parameters"]:
-            parameter = {
-                "type": param_info["type"],
-                "description": param_info["description"],
-            }
-
-            for key in ["default", "format", "enum", "items", "minimum", "maximum"]:
-                if key in param_info:
-                    parameter[key] = param_info[key]
-
-            tool["parameters"]["properties"][param_info["name"]] = parameter
-
-            required = param_info.get("required", False)
-            if required:
-                tool["parameters"]["required"].append(param_info["name"])
-
-    return {"name": target["name"], "info": tool}
-
-
-def get_prompt_targets():
-    try:
-        with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
-            config = yaml.safe_load(file)
-
-            available_tools = []
-            for target in config["prompt_targets"]:
-                if not target.get("default", False):
-                    available_tools.append(
-                        convert_prompt_target_to_openai_format(target)
-                    )
-
-            return {tool["name"]: tool["info"] for tool in available_tools}
-    except Exception as e:
-        log.info(e)
-        return None
-
-
 def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], state):
    if "history" not in state:
        state["history"] = []
@ -95,97 +49,44 @@ def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], st
    history.append({"role": "user", "content": query})
    log.info(f"history: {history}")

-    # Custom headers
-    custom_headers = {
-        "x-arch-deterministic-provider": "openai",
-    }
-
    try:
        raw_response = client.chat.completions.with_raw_response.create(
            model="--",
            messages=history,
            temperature=1.0,
-            # metadata=metadata,
-            extra_headers=custom_headers,
-            stream=STREAM_RESPONSE,
        )
    except Exception as e:
-        log.info(e)
-        # remove last user message in case of exception
        history.pop()
-        log.info("Error calling gateway API: {}".format(e))
+        # remove last user message in case of exception
+        log.error("Error calling gateway API: {}".format(e))
        raise gr.Error("Error calling gateway API: {}".format(e))

-    if STREAM_RESPONSE:
-        response = raw_response.parse()
-        history.append({"role": "assistant", "content": "", "model": ""})
-        conversation.append((query, ""))
-        # for gradio UI we don't want to show raw tool calls and messages from developer application
-        # so we're filtering those out
-        history_view = [h for h in history if h["role"] != "tool" and "content" in h]
+    log.error(f"raw_response: {raw_response.text}")
+    response = raw_response.parse()

-        for chunk in response:
-            print("chunk: " + str(chunk.to_dict()))
-            if len(chunk.choices) > 0:
-                if chunk.choices[0].delta.role:
-                    # create new history item if role changes
-                    # this is likely due to arch tool call and api response
-                    if history[-1]["role"] != chunk.choices[0].delta.role:
-                        history.append(
-                            {
-                                "role": chunk.choices[0].delta.role,
-                                "content": chunk.choices[0].delta.content,
-                                "model": chunk.model,
-                                "tool_calls": chunk.choices[0].delta.tool_calls,
-                            }
-                        )
+    # extract arch_state from metadata and store it in gradio session state
+    # this state must be passed back to the gateway in the next request
+    response_json = json.loads(raw_response.text)
+    log.info(response_json)

-                history[-1]["model"] = chunk.model
-                if chunk.choices[0].delta.content:
-                    if not history[-1]["content"]:
-                        history[-1]["content"] = ""
-                    history[-1]["content"] = (
-                        history[-1]["content"] + chunk.choices[0].delta.content
-                    )
-                if chunk.choices[0].delta.tool_calls:
-                    history[-1]["tool_calls"] = chunk.choices[0].delta.tool_calls
+    arch_messages = get_arch_messages(response_json)
+    for arch_message in arch_messages:
+        history.append(arch_message)

-                if history[-1]["role"] != "tool":
-                    if chunk.model and chunk.choices[0].delta.content != "":
-                        conversation[-1] = (
-                            conversation[-1][0],
-                            conversation[-1][1] + chunk.choices[0].delta.content,
-                        )
-                yield "", conversation, state
-    else:
-        log.error(f"raw_response: {raw_response.text}")
-        response = raw_response.parse()
+    content = response.choices[0].message.content

-        # extract arch_state from metadata and store it in gradio session state
-        # this state must be passed back to the gateway in the next request
-        response_json = json.loads(raw_response.text)
-        log.info(response_json)
+    history.append({"role": "assistant", "content": content, "model": response.model})

-        arch_messages = get_arch_messages(response_json)
-        for arch_message in arch_messages:
-            history.append(arch_message)
+    # for gradio UI we don't want to show raw tool calls and messages from developer application
+    # so we're filtering those out
+    history_view = [h for h in history if h["role"] != "tool" and "content" in h]

-        content = response.choices[0].message.content
+    conversation = [
+        (history_view[i]["content"], history_view[i + 1]["content"])
+        for i in range(0, len(history_view) - 1, 2)
+    ]

-        history.append(
-            {"role": "assistant", "content": content, "model": response.model}
-        )
-
-        # for gradio UI we don't want to show raw tool calls and messages from developer application
-        # so we're filtering those out
-        history_view = [h for h in history if h["role"] != "tool" and "content" in h]
-
-        conversation = [
-            (history_view[i]["content"], history_view[i + 1]["content"])
-            for i in range(0, len(history_view) - 1, 2)
-        ]
-
-        yield "", conversation, state
+    yield "", conversation, state


 def main():