remove streaming for run.py

This commit is contained in:
Adil Hafeez 2024-10-28 23:43:08 -07:00
parent e8498f5d25
commit bd4e72b48e

View file

@ -11,7 +11,6 @@ from dotenv import load_dotenv
load_dotenv()
STREAM_RESPONSE = bool(os.getenv("STREAM_RESPOSE", True))
logging.basicConfig(
level=logging.INFO,
@ -39,54 +38,9 @@ footer {visibility: hidden}
client = OpenAI(
api_key="--",
base_url=CHAT_COMPLETION_ENDPOINT,
# http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
)
def convert_prompt_target_to_openai_format(target):
tool = {
"description": target["description"],
"parameters": {"type": "object", "properties": {}, "required": []},
}
if "parameters" in target:
for param_info in target["parameters"]:
parameter = {
"type": param_info["type"],
"description": param_info["description"],
}
for key in ["default", "format", "enum", "items", "minimum", "maximum"]:
if key in param_info:
parameter[key] = param_info[key]
tool["parameters"]["properties"][param_info["name"]] = parameter
required = param_info.get("required", False)
if required:
tool["parameters"]["required"].append(param_info["name"])
return {"name": target["name"], "info": tool}
def get_prompt_targets():
try:
with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
config = yaml.safe_load(file)
available_tools = []
for target in config["prompt_targets"]:
if not target.get("default", False):
available_tools.append(
convert_prompt_target_to_openai_format(target)
)
return {tool["name"]: tool["info"] for tool in available_tools}
except Exception as e:
log.info(e)
return None
def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], state):
if "history" not in state:
state["history"] = []
@ -95,97 +49,44 @@ def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], st
history.append({"role": "user", "content": query})
log.info(f"history: {history}")
# Custom headers
custom_headers = {
"x-arch-deterministic-provider": "openai",
}
try:
raw_response = client.chat.completions.with_raw_response.create(
model="--",
messages=history,
temperature=1.0,
# metadata=metadata,
extra_headers=custom_headers,
stream=STREAM_RESPONSE,
)
except Exception as e:
log.info(e)
# remove last user message in case of exception
history.pop()
log.info("Error calling gateway API: {}".format(e))
# remove last user message in case of exception
log.error("Error calling gateway API: {}".format(e))
raise gr.Error("Error calling gateway API: {}".format(e))
if STREAM_RESPONSE:
response = raw_response.parse()
history.append({"role": "assistant", "content": "", "model": ""})
conversation.append((query, ""))
# for gradio UI we don't want to show raw tool calls and messages from developer application
# so we're filtering those out
history_view = [h for h in history if h["role"] != "tool" and "content" in h]
log.error(f"raw_response: {raw_response.text}")
response = raw_response.parse()
for chunk in response:
print("chunk: " + str(chunk.to_dict()))
if len(chunk.choices) > 0:
if chunk.choices[0].delta.role:
# create new history item if role changes
# this is likely due to arch tool call and api response
if history[-1]["role"] != chunk.choices[0].delta.role:
history.append(
{
"role": chunk.choices[0].delta.role,
"content": chunk.choices[0].delta.content,
"model": chunk.model,
"tool_calls": chunk.choices[0].delta.tool_calls,
}
)
# extract arch_state from metadata and store it in gradio session state
# this state must be passed back to the gateway in the next request
response_json = json.loads(raw_response.text)
log.info(response_json)
history[-1]["model"] = chunk.model
if chunk.choices[0].delta.content:
if not history[-1]["content"]:
history[-1]["content"] = ""
history[-1]["content"] = (
history[-1]["content"] + chunk.choices[0].delta.content
)
if chunk.choices[0].delta.tool_calls:
history[-1]["tool_calls"] = chunk.choices[0].delta.tool_calls
arch_messages = get_arch_messages(response_json)
for arch_message in arch_messages:
history.append(arch_message)
if history[-1]["role"] != "tool":
if chunk.model and chunk.choices[0].delta.content != "":
conversation[-1] = (
conversation[-1][0],
conversation[-1][1] + chunk.choices[0].delta.content,
)
yield "", conversation, state
else:
log.error(f"raw_response: {raw_response.text}")
response = raw_response.parse()
content = response.choices[0].message.content
# extract arch_state from metadata and store it in gradio session state
# this state must be passed back to the gateway in the next request
response_json = json.loads(raw_response.text)
log.info(response_json)
history.append({"role": "assistant", "content": content, "model": response.model})
arch_messages = get_arch_messages(response_json)
for arch_message in arch_messages:
history.append(arch_message)
# for gradio UI we don't want to show raw tool calls and messages from developer application
# so we're filtering those out
history_view = [h for h in history if h["role"] != "tool" and "content" in h]
content = response.choices[0].message.content
conversation = [
(history_view[i]["content"], history_view[i + 1]["content"])
for i in range(0, len(history_view) - 1, 2)
]
history.append(
{"role": "assistant", "content": content, "model": response.model}
)
# for gradio UI we don't want to show raw tool calls and messages from developer application
# so we're filtering those out
history_view = [h for h in history if h["role"] != "tool" and "content" in h]
conversation = [
(history_view[i]["content"], history_view[i + 1]["content"])
for i in range(0, len(history_view) - 1, 2)
]
yield "", conversation, state
yield "", conversation, state
def main():