mirror of
https://github.com/katanemo/plano.git
synced 2026-06-17 15:25:17 +02:00
130 lines
3.9 KiB
Python
130 lines
3.9 KiB
Python
import json
|
|
import random
|
|
|
|
from datasets import load_dataset
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
ARCH_FUNCTION_TOOL_PROMPT = (
|
|
"You are a helpful assistant designed to assist with the user query by making one or more function calls if needed."
|
|
"\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}\n</tools>"
|
|
"\n\nYour task is to decide which functions are needed and collect missing parameters if necessary.\n\n"
|
|
)
|
|
|
|
ARCH_FUNCTION_FORMAT_PROMPT = """
|
|
Based on your analysis, provide your response in one of the following JSON formats:
|
|
1. If no functions are needed:
|
|
```
|
|
{"response": "Your response text here"}
|
|
```
|
|
2. If functions are needed but some required parameters are missing:
|
|
```
|
|
{"required_functions": ["func_name1", "func_name2", ...], "clarification": "Text asking for missing parameters"}
|
|
```
|
|
3. If functions are needed and all required parameters are available:
|
|
```
|
|
{"tool_calls": [{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},... (more tool calls as required)]}
|
|
```
|
|
""".strip()
|
|
|
|
|
|
DATA_TYPE_MAP = {
|
|
"string": "str",
|
|
"integer": "int",
|
|
"boolean": "bool",
|
|
"number": "float",
|
|
"array": "list",
|
|
"Dict": "dict",
|
|
"List": "list",
|
|
}
|
|
|
|
|
|
def process_data_types(param_type):
|
|
extracted = {"type": [], "optional": False, "default": None}
|
|
|
|
param_type = [t.strip() for t in param_type.split(",")]
|
|
|
|
for t in param_type:
|
|
if t.startswith("optional"):
|
|
if t == "optional":
|
|
extracted["optional"] = True
|
|
elif t.startswith("default"):
|
|
if "=" in t:
|
|
extracted["default"] = t.split("=")[-1].strip()
|
|
else:
|
|
extracted["default"] = t.split(" ")[-1].strip()
|
|
else:
|
|
extracted["type"].append(t)
|
|
|
|
extracted["type"] = ", ".join(extracted["type"])
|
|
|
|
return extracted
|
|
|
|
|
|
def convert_tool(tool):
|
|
converted = {
|
|
"name": tool["name"],
|
|
"description": tool["description"],
|
|
"parameters": {"type": "object", "properties": {}, "required": []},
|
|
}
|
|
|
|
for param_name, param_value in tool["parameters"].items():
|
|
extracted = process_data_types(param_value["type"])
|
|
|
|
if extracted["type"] in DATA_TYPE_MAP:
|
|
extracted["type"] = DATA_TYPE_MAP[extracted["type"]]
|
|
|
|
parameter = {
|
|
"type": extracted["type"],
|
|
"description": param_value["description"],
|
|
}
|
|
|
|
if "default" in param_value:
|
|
parameter["default"] = param_value["default"]
|
|
elif extracted["default"] is not None:
|
|
parameter["default"] = extracted["default"]
|
|
|
|
if "default" in parameter:
|
|
if parameter["default"] is None:
|
|
parameter.pop("default")
|
|
else:
|
|
try:
|
|
if len(parameter["default"]) == 0:
|
|
parameter.pop("default")
|
|
except Exception:
|
|
pass
|
|
|
|
converted["parameters"]["properties"][param_name] = parameter
|
|
|
|
if not extracted["optional"]:
|
|
converted["parameters"]["required"].append(param_name)
|
|
|
|
return converted
|
|
|
|
|
|
def build_system_prompt(tools: List[Dict[str, Any]]) -> str:
|
|
tool_text = ""
|
|
for tool in tools:
|
|
tool_text += "\n" + json.dumps(tool)
|
|
|
|
return (
|
|
ARCH_FUNCTION_TOOL_PROMPT.format(tool_text=tool_text)
|
|
+ ARCH_FUNCTION_FORMAT_PROMPT
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
xlam = load_dataset("Salesforce/xlam-function-calling-60k")
|
|
|
|
idx = random.sample(range(len(xlam["train"])), k=1)
|
|
example = xlam["train"][idx]
|
|
|
|
print("=" * 50 + " Tools " + "=" * 50)
|
|
tools = [convert_tool(tool) for tool in json.loads(example["tools"][0])]
|
|
print(json.dumps(tools, indent=4))
|
|
|
|
print("\n" + "=" * 50 + " System Prompt " + "=" * 50)
|
|
system_prompt = build_system_prompt(tools)
|
|
|
|
# print(repr(system_prompt.encode("unicode_escape").decode()))
|
|
print(json.dumps(system_prompt))
|