plano/tests/rest/utils/generate_random_system_prompt.py
2025-03-21 09:37:56 -07:00

130 lines
3.9 KiB
Python

import json
import random
from datasets import load_dataset
from typing import Any, Dict, List
ARCH_FUNCTION_TOOL_PROMPT = (
"You are a helpful assistant designed to assist with the user query by making one or more function calls if needed."
"\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>{tool_text}\n</tools>"
"\n\nYour task is to decide which functions are needed and collect missing parameters if necessary.\n\n"
)
ARCH_FUNCTION_FORMAT_PROMPT = """
Based on your analysis, provide your response in one of the following JSON formats:
1. If no functions are needed:
```
{"response": "Your response text here"}
```
2. If functions are needed but some required parameters are missing:
```
{"required_functions": ["func_name1", "func_name2", ...], "clarification": "Text asking for missing parameters"}
```
3. If functions are needed and all required parameters are available:
```
{"tool_calls": [{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},... (more tool calls as required)]}
```
""".strip()
DATA_TYPE_MAP = {
"string": "str",
"integer": "int",
"boolean": "bool",
"number": "float",
"array": "list",
"Dict": "dict",
"List": "list",
}
def process_data_types(param_type):
extracted = {"type": [], "optional": False, "default": None}
param_type = [t.strip() for t in param_type.split(",")]
for t in param_type:
if t.startswith("optional"):
if t == "optional":
extracted["optional"] = True
elif t.startswith("default"):
if "=" in t:
extracted["default"] = t.split("=")[-1].strip()
else:
extracted["default"] = t.split(" ")[-1].strip()
else:
extracted["type"].append(t)
extracted["type"] = ", ".join(extracted["type"])
return extracted
def convert_tool(tool):
converted = {
"name": tool["name"],
"description": tool["description"],
"parameters": {"type": "object", "properties": {}, "required": []},
}
for param_name, param_value in tool["parameters"].items():
extracted = process_data_types(param_value["type"])
if extracted["type"] in DATA_TYPE_MAP:
extracted["type"] = DATA_TYPE_MAP[extracted["type"]]
parameter = {
"type": extracted["type"],
"description": param_value["description"],
}
if "default" in param_value:
parameter["default"] = param_value["default"]
elif extracted["default"] is not None:
parameter["default"] = extracted["default"]
if "default" in parameter:
if parameter["default"] is None:
parameter.pop("default")
else:
try:
if len(parameter["default"]) == 0:
parameter.pop("default")
except Exception:
pass
converted["parameters"]["properties"][param_name] = parameter
if not extracted["optional"]:
converted["parameters"]["required"].append(param_name)
return converted
def build_system_prompt(tools: List[Dict[str, Any]]) -> str:
tool_text = ""
for tool in tools:
tool_text += "\n" + json.dumps(tool)
return (
ARCH_FUNCTION_TOOL_PROMPT.format(tool_text=tool_text)
+ ARCH_FUNCTION_FORMAT_PROMPT
)
if __name__ == "__main__":
xlam = load_dataset("Salesforce/xlam-function-calling-60k")
idx = random.sample(range(len(xlam["train"])), k=1)
example = xlam["train"][idx]
print("=" * 50 + " Tools " + "=" * 50)
tools = [convert_tool(tool) for tool in json.loads(example["tools"][0])]
print(json.dumps(tools, indent=4))
print("\n" + "=" * 50 + " System Prompt " + "=" * 50)
system_prompt = build_system_prompt(tools)
# print(repr(system_prompt.encode("unicode_escape").decode()))
print(json.dumps(system_prompt))