mirror of
https://github.com/katanemo/plano.git
synced 2026-06-05 14:45:15 +02:00
Improve Gradio UI and fix arch_state bug (#227)
This commit is contained in:
parent
662a840ac5
commit
60299244b9
9 changed files with 209 additions and 262 deletions
5
chatbot_ui/.vscode/launch.json
vendored
5
chatbot_ui/.vscode/launch.json
vendored
|
|
@ -7,16 +7,15 @@
|
||||||
{
|
{
|
||||||
"python": "${workspaceFolder}/venv/bin/python",
|
"python": "${workspaceFolder}/venv/bin/python",
|
||||||
"name": "chatbot-ui",
|
"name": "chatbot-ui",
|
||||||
"cwd": "${workspaceFolder}/app",
|
|
||||||
"type": "debugpy",
|
"type": "debugpy",
|
||||||
"request": "launch",
|
"request": "launch",
|
||||||
"program": "run.py",
|
"program": "run_stream.py",
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"env": {
|
"env": {
|
||||||
"LLM": "1",
|
"LLM": "1",
|
||||||
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
|
"CHAT_COMPLETION_ENDPOINT": "http://localhost:10000/v1",
|
||||||
"STREAMING": "True",
|
"STREAMING": "True",
|
||||||
"ARCH_CONFIG": "../../demos/function_calling/arch_config.yaml"
|
"ARCH_CONFIG": "../demos/function_calling/arch_config.yaml"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -8,13 +8,11 @@ COPY requirements.txt /src/
|
||||||
|
|
||||||
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
|
RUN pip install --prefix=/runtime --force-reinstall -r requirements.txt
|
||||||
|
|
||||||
COPY . /src
|
|
||||||
|
|
||||||
FROM python:3.10-slim AS output
|
FROM python:3.10-slim AS output
|
||||||
|
|
||||||
COPY --from=builder /runtime /usr/local
|
COPY --from=builder /runtime /usr/local
|
||||||
|
|
||||||
COPY /app /app
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
COPY *.py .
|
||||||
|
|
||||||
CMD ["python", "run.py"]
|
CMD ["python", "run_stream.py"]
|
||||||
|
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
import json
|
|
||||||
|
|
||||||
|
|
||||||
ARCH_STATE_HEADER = "x-arch-state"
|
|
||||||
|
|
||||||
|
|
||||||
def get_arch_messages(response_json):
|
|
||||||
arch_messages = []
|
|
||||||
if response_json and "metadata" in response_json:
|
|
||||||
# load arch_state from metadata
|
|
||||||
arch_state_str = response_json.get("metadata", {}).get(ARCH_STATE_HEADER, "{}")
|
|
||||||
# parse arch_state into json object
|
|
||||||
arch_state = json.loads(arch_state_str)
|
|
||||||
# load messages from arch_state
|
|
||||||
arch_messages_str = arch_state.get("messages", "[]")
|
|
||||||
# parse messages into json object
|
|
||||||
arch_messages = json.loads(arch_messages_str)
|
|
||||||
# append messages from arch gateway to history
|
|
||||||
return arch_messages
|
|
||||||
return []
|
|
||||||
|
|
@ -1,231 +0,0 @@
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import yaml
|
|
||||||
from arch_util import get_arch_messages
|
|
||||||
import gradio as gr
|
|
||||||
|
|
||||||
from typing import List, Optional, Tuple
|
|
||||||
from openai import OpenAI
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
STREAM_RESPONSE = bool(os.getenv("STREAM_RESPOSE", True))
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT")
|
|
||||||
log.info(f"CHAT_COMPLETION_ENDPOINT: {CHAT_COMPLETION_ENDPOINT}")
|
|
||||||
|
|
||||||
|
|
||||||
CSS_STYLE = """
|
|
||||||
.json-container {
|
|
||||||
height: 95vh !important;
|
|
||||||
overflow-y: auto !important;
|
|
||||||
}
|
|
||||||
.chatbot {
|
|
||||||
height: calc(95vh - 100px) !important;
|
|
||||||
overflow-y: auto !important;
|
|
||||||
}
|
|
||||||
footer {visibility: hidden}
|
|
||||||
"""
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
api_key="--",
|
|
||||||
base_url=CHAT_COMPLETION_ENDPOINT,
|
|
||||||
# http_client=DefaultHttpxClient(headers={"accept-encoding": "*"}),
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def convert_prompt_target_to_openai_format(target):
|
|
||||||
tool = {
|
|
||||||
"description": target["description"],
|
|
||||||
"parameters": {"type": "object", "properties": {}, "required": []},
|
|
||||||
}
|
|
||||||
|
|
||||||
if "parameters" in target:
|
|
||||||
for param_info in target["parameters"]:
|
|
||||||
parameter = {
|
|
||||||
"type": param_info["type"],
|
|
||||||
"description": param_info["description"],
|
|
||||||
}
|
|
||||||
|
|
||||||
for key in ["default", "format", "enum", "items", "minimum", "maximum"]:
|
|
||||||
if key in param_info:
|
|
||||||
parameter[key] = param_info[key]
|
|
||||||
|
|
||||||
tool["parameters"]["properties"][param_info["name"]] = parameter
|
|
||||||
|
|
||||||
required = param_info.get("required", False)
|
|
||||||
if required:
|
|
||||||
tool["parameters"]["required"].append(param_info["name"])
|
|
||||||
|
|
||||||
return {"name": target["name"], "info": tool}
|
|
||||||
|
|
||||||
|
|
||||||
def get_prompt_targets():
|
|
||||||
try:
|
|
||||||
with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
|
|
||||||
config = yaml.safe_load(file)
|
|
||||||
|
|
||||||
available_tools = []
|
|
||||||
for target in config["prompt_targets"]:
|
|
||||||
if not target.get("default", False):
|
|
||||||
available_tools.append(
|
|
||||||
convert_prompt_target_to_openai_format(target)
|
|
||||||
)
|
|
||||||
|
|
||||||
return {tool["name"]: tool["info"] for tool in available_tools}
|
|
||||||
except Exception as e:
|
|
||||||
log.info(e)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def chat(query: Optional[str], conversation: Optional[List[Tuple[str, str]]], state):
|
|
||||||
if "history" not in state:
|
|
||||||
state["history"] = []
|
|
||||||
|
|
||||||
history = state.get("history")
|
|
||||||
history.append({"role": "user", "content": query})
|
|
||||||
log.info(f"history: {history}")
|
|
||||||
|
|
||||||
# Custom headers
|
|
||||||
custom_headers = {
|
|
||||||
"x-arch-deterministic-provider": "openai",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
raw_response = client.chat.completions.with_raw_response.create(
|
|
||||||
model="--",
|
|
||||||
messages=history,
|
|
||||||
temperature=1.0,
|
|
||||||
# metadata=metadata,
|
|
||||||
extra_headers=custom_headers,
|
|
||||||
stream=STREAM_RESPONSE,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
log.info(e)
|
|
||||||
# remove last user message in case of exception
|
|
||||||
history.pop()
|
|
||||||
log.info("Error calling gateway API: {}".format(e))
|
|
||||||
raise gr.Error("Error calling gateway API: {}".format(e))
|
|
||||||
|
|
||||||
if STREAM_RESPONSE:
|
|
||||||
response = raw_response.parse()
|
|
||||||
history.append({"role": "assistant", "content": "", "model": ""})
|
|
||||||
# for gradio UI we don't want to show raw tool calls and messages from developer application
|
|
||||||
# so we're filtering those out
|
|
||||||
history_view = [h for h in history if h["role"] != "tool" and "content" in h]
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
(history_view[i]["content"], history_view[i + 1]["content"])
|
|
||||||
for i in range(0, len(history_view) - 1, 2)
|
|
||||||
]
|
|
||||||
|
|
||||||
for chunk in response:
|
|
||||||
if len(chunk.choices) > 0:
|
|
||||||
if chunk.choices[0].delta.role:
|
|
||||||
if history[-1]["role"] != chunk.choices[0].delta.role:
|
|
||||||
history.append(
|
|
||||||
{
|
|
||||||
"role": chunk.choices[0].delta.role,
|
|
||||||
"content": chunk.choices[0].delta.content,
|
|
||||||
"model": chunk.model,
|
|
||||||
"tool_calls": chunk.choices[0].delta.tool_calls,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
history[-1]["model"] = chunk.model
|
|
||||||
if chunk.choices[0].delta.content:
|
|
||||||
if not history[-1]["content"]:
|
|
||||||
history[-1]["content"] = ""
|
|
||||||
history[-1]["content"] = (
|
|
||||||
history[-1]["content"] + chunk.choices[0].delta.content
|
|
||||||
)
|
|
||||||
if chunk.choices[0].delta.tool_calls:
|
|
||||||
history[-1]["tool_calls"] = chunk.choices[0].delta.tool_calls
|
|
||||||
|
|
||||||
if chunk.model and chunk.choices[0].delta.content:
|
|
||||||
messages[-1] = (
|
|
||||||
messages[-1][0],
|
|
||||||
messages[-1][1] + chunk.choices[0].delta.content,
|
|
||||||
)
|
|
||||||
yield "", messages, state
|
|
||||||
else:
|
|
||||||
log.error(f"raw_response: {raw_response.text}")
|
|
||||||
response = raw_response.parse()
|
|
||||||
|
|
||||||
# extract arch_state from metadata and store it in gradio session state
|
|
||||||
# this state must be passed back to the gateway in the next request
|
|
||||||
response_json = json.loads(raw_response.text)
|
|
||||||
log.info(response_json)
|
|
||||||
|
|
||||||
arch_messages = get_arch_messages(response_json)
|
|
||||||
for arch_message in arch_messages:
|
|
||||||
history.append(arch_message)
|
|
||||||
|
|
||||||
content = response.choices[0].message.content
|
|
||||||
|
|
||||||
history.append(
|
|
||||||
{"role": "assistant", "content": content, "model": response.model}
|
|
||||||
)
|
|
||||||
|
|
||||||
# for gradio UI we don't want to show raw tool calls and messages from developer application
|
|
||||||
# so we're filtering those out
|
|
||||||
history_view = [h for h in history if h["role"] != "tool" and "content" in h]
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
(history_view[i]["content"], history_view[i + 1]["content"])
|
|
||||||
for i in range(0, len(history_view) - 1, 2)
|
|
||||||
]
|
|
||||||
|
|
||||||
yield "", messages, state
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
with gr.Blocks(
|
|
||||||
theme=gr.themes.Default(
|
|
||||||
font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "Arial", "sans-serif"]
|
|
||||||
),
|
|
||||||
fill_height=True,
|
|
||||||
css=CSS_STYLE,
|
|
||||||
) as demo:
|
|
||||||
with gr.Row(equal_height=True):
|
|
||||||
state = gr.State({})
|
|
||||||
|
|
||||||
with gr.Column(scale=4):
|
|
||||||
gr.JSON(
|
|
||||||
value=get_prompt_targets(),
|
|
||||||
open=True,
|
|
||||||
show_indices=False,
|
|
||||||
label="Available Tools",
|
|
||||||
scale=1,
|
|
||||||
min_height="95vh",
|
|
||||||
elem_classes="json-container",
|
|
||||||
)
|
|
||||||
with gr.Column(scale=6):
|
|
||||||
chatbot = gr.Chatbot(
|
|
||||||
label="Arch Chatbot",
|
|
||||||
scale=1,
|
|
||||||
elem_classes="chatbot",
|
|
||||||
)
|
|
||||||
textbox = gr.Textbox(
|
|
||||||
show_label=False,
|
|
||||||
placeholder="Enter text and press enter",
|
|
||||||
scale=1,
|
|
||||||
autofocus=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
textbox.submit(chat, [textbox, chatbot, state], [textbox, chatbot, state])
|
|
||||||
|
|
||||||
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
77
chatbot_ui/common.py
Normal file
77
chatbot_ui/common.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def process_stream_chunk(chunk, history):
|
||||||
|
delta = chunk.choices[0].delta
|
||||||
|
if delta.role and delta.role != history[-1]["role"]:
|
||||||
|
# create new history item if role changes
|
||||||
|
# this is likely due to arch tool call and api response
|
||||||
|
history.append({"role": delta.role})
|
||||||
|
|
||||||
|
history[-1]["model"] = chunk.model
|
||||||
|
# append tool calls to history if there are any in the chunk
|
||||||
|
if delta.tool_calls:
|
||||||
|
history[-1]["tool_calls"] = delta.tool_calls
|
||||||
|
|
||||||
|
if delta.content:
|
||||||
|
# append content to the last history item
|
||||||
|
history[-1]["content"] = history[-1].get("content", "") + delta.content
|
||||||
|
# yield content if it is from assistant
|
||||||
|
if history[-1]["role"] == "assistant":
|
||||||
|
return delta.content
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def convert_prompt_target_to_openai_format(target):
|
||||||
|
tool = {
|
||||||
|
"description": target["description"],
|
||||||
|
"parameters": {"type": "object", "properties": {}, "required": []},
|
||||||
|
}
|
||||||
|
|
||||||
|
if "parameters" in target:
|
||||||
|
for param_info in target["parameters"]:
|
||||||
|
parameter = {
|
||||||
|
"type": param_info["type"],
|
||||||
|
"description": param_info["description"],
|
||||||
|
}
|
||||||
|
|
||||||
|
for key in ["default", "format", "enum", "items", "minimum", "maximum"]:
|
||||||
|
if key in param_info:
|
||||||
|
parameter[key] = param_info[key]
|
||||||
|
|
||||||
|
tool["parameters"]["properties"][param_info["name"]] = parameter
|
||||||
|
|
||||||
|
required = param_info.get("required", False)
|
||||||
|
if required:
|
||||||
|
tool["parameters"]["required"].append(param_info["name"])
|
||||||
|
|
||||||
|
return {"name": target["name"], "info": tool}
|
||||||
|
|
||||||
|
|
||||||
|
def get_prompt_targets():
|
||||||
|
try:
|
||||||
|
with open(os.getenv("ARCH_CONFIG", "arch_config.yaml"), "r") as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
available_tools = []
|
||||||
|
for target in config["prompt_targets"]:
|
||||||
|
if not target.get("default", False):
|
||||||
|
available_tools.append(
|
||||||
|
convert_prompt_target_to_openai_format(target)
|
||||||
|
)
|
||||||
|
|
||||||
|
return {tool["name"]: tool["info"] for tool in available_tools}
|
||||||
|
except Exception as e:
|
||||||
|
log.info(e)
|
||||||
|
return None
|
||||||
120
chatbot_ui/run_stream.py
Normal file
120
chatbot_ui/run_stream.py
Normal file
|
|
@ -0,0 +1,120 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import yaml
|
||||||
|
import gradio as gr
|
||||||
|
|
||||||
|
from typing import List, Optional, Tuple
|
||||||
|
from openai import OpenAI
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from common import get_prompt_targets, process_stream_chunk
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CHAT_COMPLETION_ENDPOINT = os.getenv("CHAT_COMPLETION_ENDPOINT")
|
||||||
|
log.info(f"CHAT_COMPLETION_ENDPOINT: {CHAT_COMPLETION_ENDPOINT}")
|
||||||
|
|
||||||
|
|
||||||
|
CSS_STYLE = """
|
||||||
|
.json-container {
|
||||||
|
height: 95vh !important;
|
||||||
|
overflow-y: auto !important;
|
||||||
|
}
|
||||||
|
.chatbot {
|
||||||
|
height: calc(95vh - 100px) !important;
|
||||||
|
overflow-y: auto !important;
|
||||||
|
}
|
||||||
|
footer {visibility: hidden}
|
||||||
|
"""
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="--",
|
||||||
|
base_url=CHAT_COMPLETION_ENDPOINT,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def chat(
|
||||||
|
query: Optional[str],
|
||||||
|
conversation: Optional[List[Tuple[str, str]]],
|
||||||
|
history: List[dict],
|
||||||
|
):
|
||||||
|
history.append({"role": "user", "content": query})
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
# we select model from arch_config file
|
||||||
|
model="--",
|
||||||
|
messages=history,
|
||||||
|
temperature=1.0,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
# remove last user message in case of exception
|
||||||
|
history.pop()
|
||||||
|
log.info("Error calling gateway API: {}".format(e))
|
||||||
|
raise gr.Error("Error calling gateway API: {}".format(e))
|
||||||
|
|
||||||
|
conversation.append((query, ""))
|
||||||
|
|
||||||
|
for chunk in response:
|
||||||
|
tokens = process_stream_chunk(chunk, history)
|
||||||
|
if tokens:
|
||||||
|
conversation[-1] = (
|
||||||
|
conversation[-1][0],
|
||||||
|
conversation[-1][1] + tokens,
|
||||||
|
)
|
||||||
|
|
||||||
|
yield "", conversation, history
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with gr.Blocks(
|
||||||
|
theme=gr.themes.Default(
|
||||||
|
font_mono=[gr.themes.GoogleFont("IBM Plex Mono"), "Arial", "sans-serif"]
|
||||||
|
),
|
||||||
|
fill_height=True,
|
||||||
|
css=CSS_STYLE,
|
||||||
|
) as demo:
|
||||||
|
with gr.Row(equal_height=True):
|
||||||
|
history = gr.State([])
|
||||||
|
|
||||||
|
with gr.Column(scale=1):
|
||||||
|
with gr.Accordion("See available tools", open=False):
|
||||||
|
with gr.Column(scale=1):
|
||||||
|
gr.JSON(
|
||||||
|
value=get_prompt_targets(),
|
||||||
|
show_indices=False,
|
||||||
|
elem_classes="json-container",
|
||||||
|
min_height="95vh",
|
||||||
|
)
|
||||||
|
|
||||||
|
with gr.Column(scale=2):
|
||||||
|
chatbot = gr.Chatbot(
|
||||||
|
label="Arch Chatbot",
|
||||||
|
elem_classes="chatbot",
|
||||||
|
)
|
||||||
|
textbox = gr.Textbox(
|
||||||
|
show_label=False,
|
||||||
|
placeholder="Enter text and press enter",
|
||||||
|
autofocus=True,
|
||||||
|
elem_classes="textbox",
|
||||||
|
)
|
||||||
|
|
||||||
|
textbox.submit(
|
||||||
|
chat, [textbox, chatbot, history], [textbox, chatbot, history]
|
||||||
|
)
|
||||||
|
|
||||||
|
demo.launch(server_name="0.0.0.0", server_port=8080, show_error=True, debug=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
@ -900,7 +900,11 @@ impl StreamContext {
|
||||||
|
|
||||||
// don't send tools message and api response to chat gpt
|
// don't send tools message and api response to chat gpt
|
||||||
for m in callout_context.request_body.messages.iter() {
|
for m in callout_context.request_body.messages.iter() {
|
||||||
if m.role == TOOL_ROLE || m.content.is_none() {
|
// don't send api response and tool calls to upstream LLMs
|
||||||
|
if m.role == TOOL_ROLE
|
||||||
|
|| m.content.is_none()
|
||||||
|
|| (m.tool_calls.is_some() && !m.tool_calls.as_ref().unwrap().is_empty())
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
messages.push(m.clone());
|
messages.push(m.clone());
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,7 @@ class DefaultTargetRequest(BaseModel):
|
||||||
|
|
||||||
@app.post("/default_target")
|
@app.post("/default_target")
|
||||||
async def default_target(req: DefaultTargetRequest, res: Response):
|
async def default_target(req: DefaultTargetRequest, res: Response):
|
||||||
logger.info(f"Received arch_messages: {req.messages}")
|
logger.info(f"Received messages: {req.messages}")
|
||||||
resp = {
|
resp = {
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
|
|
|
||||||
|
|
@ -186,8 +186,8 @@ async def hallucination(req: HallucinationRequest, res: Response):
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
classifier = zero_shot_model["pipeline"]
|
classifier = zero_shot_model["pipeline"]
|
||||||
|
|
||||||
if "arch_messages" in req.parameters:
|
if "messages" in req.parameters:
|
||||||
req.parameters.pop("arch_messages")
|
req.parameters.pop("messages")
|
||||||
|
|
||||||
candidate_labels = {f"{k} is {v}": k for k, v in req.parameters.items()}
|
candidate_labels = {f"{k} is {v}": k for k, v in req.parameters.items()}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue