From 1265d3d924b0e1553591c6628d0c2de2a18d5722 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Fri, 8 Dec 2023 12:37:06 +0800 Subject: [PATCH 01/68] feat: make_tools by function. --- metagpt/actions/make_tools.py | 49 ++++++++++++++++++++++++ metagpt/provider/base_gpt_api.py | 2 +- tests/metagpt/actions/test_make_tools.py | 18 +++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 metagpt/actions/make_tools.py create mode 100644 tests/metagpt/actions/test_make_tools.py diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py new file mode 100644 index 000000000..7fd05751e --- /dev/null +++ b/metagpt/actions/make_tools.py @@ -0,0 +1,49 @@ +from typing import List, Dict +from pathlib import Path +import re + +from tenacity import retry, stop_after_attempt, wait_fixed + +from metagpt.logs import logger +from metagpt.schema import Message +from metagpt.actions.write_analysis_code import WriteCodeByGenerate + + +class MakeTools(WriteCodeByGenerate): + DEFAULT_SYSTEM_MSG = """Please Create a General Function Code startswith `def` from any codes you got.\n + **Notice:The import statement must be written after `def`, it is very important for you. + Reflect on whether it meets the requirements of function. Must Write example code, and we will execute the example code.** + """ + + def __init__(self, name: str = '', context=None, llm=None, workspace: str = None): + super().__init__(name, context, llm) + self.workspace = workspace or "." + self.file_suffix = '.py' + + def parse_function_name(self, function_code: str) -> str: + # 定义正则表达式模式 + pattern = r'\bdef\s+([a-zA-Z_]\w*)\s*\(' + # 在代码中搜索匹配的模式 + match = re.search(pattern, function_code) + # 如果找到匹配项,则返回匹配的函数名;否则返回None + if match: + return match.group(1) + else: + return None + + def save(self, tool_code: str) -> None: + func_name = self.parse_function_name(tool_code) + if func_name is None: + raise ValueError(f"No function name found in {tool_code}") + saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) + logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") + saved_path.write_text(tool_code, encoding='utf-8') + + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + async def run(self, code_message: List[Message | Dict], **kwargs) -> str: + msgs = self.process_msg(code_message) + logger.info(f"Ask: {msgs[-1]}") + tool_code = await self.llm.aask_code(msgs, **kwargs) + logger.info(f"Respond: Got {tool_code} from llm.") + self.save(tool_code['code']) + return tool_code["code"] diff --git a/metagpt/provider/base_gpt_api.py b/metagpt/provider/base_gpt_api.py index b6b034329..5516ceb7c 100644 --- a/metagpt/provider/base_gpt_api.py +++ b/metagpt/provider/base_gpt_api.py @@ -150,7 +150,7 @@ class BaseGPTAPI(BaseChatbot): :return dict: return the first function arguments of choice, for example, {'language': 'python', 'code': "print('Hello, World!')"} """ - return json.loads(self.get_choice_function(rsp)["arguments"]) + return json.loads(self.get_choice_function(rsp)["arguments"], strict=False) def messages_to_prompt(self, messages: list[dict]): """[{"role": "user", "content": msg}] to user: etc.""" diff --git a/tests/metagpt/actions/test_make_tools.py b/tests/metagpt/actions/test_make_tools.py new file mode 100644 index 000000000..2c5168bf1 --- /dev/null +++ b/tests/metagpt/actions/test_make_tools.py @@ -0,0 +1,18 @@ +import pytest + +from metagpt.actions.execute_code import ExecutePyCode +from metagpt.actions.make_tools import MakeTools + + +@pytest.mark.asyncio +async def test_make_tools(): + code = "import yfinance as yf\n\n# Collect Alibaba stock data\nalibaba = yf.Ticker('BABA')\ndata = alibaba.history(period='1d', start='2022-01-01', end='2022-12-31')\nprint(data.head())" + msgs = [{'role': 'assistant', 'content': code}] + mt = MakeTools() + tool_code = await mt.run(msgs) + print(tool_code) + ep = ExecutePyCode() + tool_code = "!pip install yfinance\n" + tool_code + result, res_type = await ep.run(tool_code) + assert res_type is True + print(result) From ab020adec4c10e400410fb43c5dc7972e4cf0477 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 14:39:14 +0800 Subject: [PATCH 02/68] update: add refactor code for make_tools. --- metagpt/actions/make_tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 7fd05751e..9da829e1f 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -11,8 +11,10 @@ from metagpt.actions.write_analysis_code import WriteCodeByGenerate class MakeTools(WriteCodeByGenerate): DEFAULT_SYSTEM_MSG = """Please Create a General Function Code startswith `def` from any codes you got.\n - **Notice:The import statement must be written after `def`, it is very important for you. - Reflect on whether it meets the requirements of function. Must Write example code, and we will execute the example code.** + **Notice:1. The import statement must be written after `def`, it is very important for you. + 2. Reflect on whether it meets the requirements of function. + 3. Refactor your code with the best performance when dealing with big data. + 4. Must Write example code, and it could be execute in the user machine.** """ def __init__(self, name: str = '', context=None, llm=None, workspace: str = None): From 402ec5bcb44528f9c2ce7505e75f30998cd87024 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 14:39:57 +0800 Subject: [PATCH 03/68] add new test for make tools. --- tests/metagpt/actions/test_make_tools.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/metagpt/actions/test_make_tools.py b/tests/metagpt/actions/test_make_tools.py index 2c5168bf1..4f7d7859a 100644 --- a/tests/metagpt/actions/test_make_tools.py +++ b/tests/metagpt/actions/test_make_tools.py @@ -16,3 +16,19 @@ async def test_make_tools(): result, res_type = await ep.run(tool_code) assert res_type is True print(result) + + +@pytest.mark.asyncio +async def test_make_tools2(): + code = '''import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n + data['started_at'] = data['started_at'].apply(lambda r: pd.to_datetime(r))\n + data['ended_at'] = data['ended_at'].apply(lambda r: pd.to_datetime(r))\ndata.head()''' + msgs = [{'role': 'assistant', 'content': code}] + mt = MakeTools() + tool_code = await mt.run(msgs) + print(tool_code) + ep = ExecutePyCode() + tool_code = tool_code + result, res_type = await ep.run(tool_code) + assert res_type is True + print(result) From d9342025cdd01730f87ede2f0f9e10aaedd7dda6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 14:40:16 +0800 Subject: [PATCH 04/68] update typing-extensions. --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1d1bc95a1..1ca309762 100644 --- a/requirements.txt +++ b/requirements.txt @@ -51,4 +51,4 @@ nbformat==5.9.2 ipython==8.17.2 ipykernel==6.27.0 scikit_learn==1.3.2 -typing-extensions==4.8.0 \ No newline at end of file +typing-extensions==4.9.0 \ No newline at end of file From 3ea4b3200bef5bbdc1b656b34093a74f03d4d334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 15:00:28 +0800 Subject: [PATCH 05/68] update DEFAULT_SYSTEM_MSG. --- metagpt/actions/make_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 9da829e1f..0b5d09d8c 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -13,8 +13,8 @@ class MakeTools(WriteCodeByGenerate): DEFAULT_SYSTEM_MSG = """Please Create a General Function Code startswith `def` from any codes you got.\n **Notice:1. The import statement must be written after `def`, it is very important for you. 2. Reflect on whether it meets the requirements of function. - 3. Refactor your code with the best performance when dealing with big data. - 4. Must Write example code, and it could be execute in the user machine.** + 3. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. + 4. Write example code by using old varibales in old code, and make sure it could be execute in the user's machine.** """ def __init__(self, name: str = '', context=None, llm=None, workspace: str = None): From 65db6683e6069501a669e73c1eaad3bae7566a09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 15:18:26 +0800 Subject: [PATCH 06/68] add new test instance. --- tests/metagpt/actions/test_make_tools.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/metagpt/actions/test_make_tools.py b/tests/metagpt/actions/test_make_tools.py index 4f7d7859a..7811cf7ab 100644 --- a/tests/metagpt/actions/test_make_tools.py +++ b/tests/metagpt/actions/test_make_tools.py @@ -32,3 +32,20 @@ async def test_make_tools2(): result, res_type = await ep.run(tool_code) assert res_type is True print(result) + + +@pytest.mark.asyncio +async def test_make_tools3(): + code = '''import pandas as pd\npath = "./tests/data/test.csv"\ndf = pd.read_csv(path)\ndata = df.copy()\n + data['started_at'] = data['started_at'].apply(lambda r: pd.to_datetime(r))\n + data['ended_at'] = data['ended_at'].apply(lambda r: pd.to_datetime(r))\n + data['duration_hour'] = (data['ended_at'] - data['started_at']).dt.seconds/3600\ndata.head()''' + msgs = [{'role': 'assistant', 'content': code}] + mt = MakeTools() + tool_code = await mt.run(msgs) + print(tool_code) + ep = ExecutePyCode() + tool_code = tool_code + result, res_type = await ep.run(tool_code) + assert res_type is True + print(result) From 5a01fdb0e2b00f597a702b45ff818977fd9dba9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 15:18:59 +0800 Subject: [PATCH 07/68] update DEFAULT_SYSTEM_MSG. --- metagpt/actions/make_tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 0b5d09d8c..9ab7fd922 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -10,9 +10,9 @@ from metagpt.actions.write_analysis_code import WriteCodeByGenerate class MakeTools(WriteCodeByGenerate): - DEFAULT_SYSTEM_MSG = """Please Create a General Function Code startswith `def` from any codes you got.\n + DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n **Notice:1. The import statement must be written after `def`, it is very important for you. - 2. Reflect on whether it meets the requirements of function. + 2. Reflect on whether it meets the requirements of a general function. 3. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. 4. Write example code by using old varibales in old code, and make sure it could be execute in the user's machine.** """ From 76c95f8428ac08b5bd1a12f4e742c108fbae08eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 16:00:09 +0800 Subject: [PATCH 08/68] chore: add logger.debug(). --- tests/metagpt/actions/test_make_tools.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/metagpt/actions/test_make_tools.py b/tests/metagpt/actions/test_make_tools.py index 7811cf7ab..264599439 100644 --- a/tests/metagpt/actions/test_make_tools.py +++ b/tests/metagpt/actions/test_make_tools.py @@ -2,6 +2,7 @@ import pytest from metagpt.actions.execute_code import ExecutePyCode from metagpt.actions.make_tools import MakeTools +from metagpt.logs import logger @pytest.mark.asyncio @@ -10,12 +11,12 @@ async def test_make_tools(): msgs = [{'role': 'assistant', 'content': code}] mt = MakeTools() tool_code = await mt.run(msgs) - print(tool_code) + logger.debug(tool_code) ep = ExecutePyCode() tool_code = "!pip install yfinance\n" + tool_code result, res_type = await ep.run(tool_code) assert res_type is True - print(result) + logger.debug(result) @pytest.mark.asyncio @@ -26,12 +27,12 @@ async def test_make_tools2(): msgs = [{'role': 'assistant', 'content': code}] mt = MakeTools() tool_code = await mt.run(msgs) - print(tool_code) + logger.debug(tool_code) ep = ExecutePyCode() tool_code = tool_code result, res_type = await ep.run(tool_code) assert res_type is True - print(result) + logger.debug(result) @pytest.mark.asyncio @@ -43,9 +44,9 @@ async def test_make_tools3(): msgs = [{'role': 'assistant', 'content': code}] mt = MakeTools() tool_code = await mt.run(msgs) - print(tool_code) + logger.debug(tool_code) ep = ExecutePyCode() tool_code = tool_code result, res_type = await ep.run(tool_code) assert res_type is True - print(result) + logger.debug(result) From c2f0e547ee2db3332fdb4408ef8f2c179243735d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 16:03:22 +0800 Subject: [PATCH 09/68] =?UTF-8?q?chore:=20=E5=B1=9E=E6=80=A7=E6=B3=A8?= =?UTF-8?q?=E9=87=8A=EF=BC=8C=E4=BB=A5=E5=8F=8A=E5=85=A5=E5=8F=82=E7=9A=84?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=B1=BB=E5=9E=8B=E5=AE=9A=E4=B9=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/actions/make_tools.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 9ab7fd922..f7e385138 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -4,6 +4,7 @@ import re from tenacity import retry, stop_after_attempt, wait_fixed +from metagpt.llm import LLM from metagpt.logs import logger from metagpt.schema import Message from metagpt.actions.write_analysis_code import WriteCodeByGenerate @@ -17,10 +18,10 @@ class MakeTools(WriteCodeByGenerate): 4. Write example code by using old varibales in old code, and make sure it could be execute in the user's machine.** """ - def __init__(self, name: str = '', context=None, llm=None, workspace: str = None): + def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): super().__init__(name, context, llm) self.workspace = workspace or "." - self.file_suffix = '.py' + self.file_suffix: str = '.py' def parse_function_name(self, function_code: str) -> str: # 定义正则表达式模式 From 4b58942159342c74c053a235b473e578f3147dbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 16:05:50 +0800 Subject: [PATCH 10/68] =?UTF-8?q?chore:=20=E5=B1=9E=E6=80=A7=E6=B3=A8?= =?UTF-8?q?=E9=87=8A=EF=BC=8C=E4=BB=A5=E5=8F=8A=E5=85=A5=E5=8F=82=E7=9A=84?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E7=B1=BB=E5=9E=8B=E5=AE=9A=E4=B9=89.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/actions/make_tools.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index f7e385138..aa2ebe501 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -19,6 +19,12 @@ class MakeTools(WriteCodeByGenerate): """ def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): + """ + :param str name: name, defaults to '' + :param list[Message] context: context, defaults to None + :param LLM llm: llm, defaults to None + :param str workspace: tools code saved file path dir, defaults to None + """ super().__init__(name, context, llm) self.workspace = workspace or "." self.file_suffix: str = '.py' From 1b4aac394d1a5095224a735a83e3034d447231c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 19:28:57 +0800 Subject: [PATCH 11/68] chore: update DEFAULT_SYSTEM_MSG and self.workspace. --- metagpt/actions/make_tools.py | 10 ++++++---- metagpt/tools/functions/libs/udf/__init__.py | 0 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 metagpt/tools/functions/libs/udf/__init__.py diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index aa2ebe501..7cad8ef7b 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -12,9 +12,10 @@ from metagpt.actions.write_analysis_code import WriteCodeByGenerate class MakeTools(WriteCodeByGenerate): DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n - **Notice:1. The import statement must be written after `def`, it is very important for you. - 2. Reflect on whether it meets the requirements of a general function. - 3. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. + **Notice: + 1. Reflect on whether it meets the requirements of a general function. + 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. + 3. Use Google style for function annotations. 4. Write example code by using old varibales in old code, and make sure it could be execute in the user's machine.** """ @@ -26,7 +27,7 @@ class MakeTools(WriteCodeByGenerate): :param str workspace: tools code saved file path dir, defaults to None """ super().__init__(name, context, llm) - self.workspace = workspace or "." + self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf")) self.file_suffix: str = '.py' def parse_function_name(self, function_code: str) -> str: @@ -47,6 +48,7 @@ class MakeTools(WriteCodeByGenerate): saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") saved_path.write_text(tool_code, encoding='utf-8') + # TODO: 保存到udf中,供WriteCodeWithMakeTools使用 @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def run(self, code_message: List[Message | Dict], **kwargs) -> str: diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py new file mode 100644 index 000000000..e69de29bb From 51bf8863af9414069d7de54d780fc5f1d83bf51a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 19:47:23 +0800 Subject: [PATCH 12/68] add udf. --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e03eab3d3..1a517d027 100644 --- a/.gitignore +++ b/.gitignore @@ -129,6 +129,7 @@ venv.bak/ .mypy_cache/ .dmypy.json dmypy.json +metagpt/tools/functions/libs/udf/*.py # Pyre type checker .pyre/ From 2b8dbec5d044c7e5c67a6cb4b3146e69a632bab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 21:01:53 +0800 Subject: [PATCH 13/68] update DEFAULT_SYSTEM_MSG. --- metagpt/actions/make_tools.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 7cad8ef7b..2b2ba1cd5 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -16,7 +16,8 @@ class MakeTools(WriteCodeByGenerate): 1. Reflect on whether it meets the requirements of a general function. 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. 3. Use Google style for function annotations. - 4. Write example code by using old varibales in old code, and make sure it could be execute in the user's machine.** + 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, + and make sure it could be execute in the user's machine.** """ def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): From 3de10e76562c71d884c8c2a3dd93a1180eae15b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 11 Dec 2023 21:12:36 +0800 Subject: [PATCH 14/68] add UDFS for make tools. --- metagpt/tools/functions/libs/udf/__init__.py | 50 ++++++++++++++++++++ tests/metagpt/tools/functions/test_udf.py | 9 ++++ 2 files changed, 59 insertions(+) create mode 100644 tests/metagpt/tools/functions/test_udf.py diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index e69de29bb..0bdf84d87 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -0,0 +1,50 @@ +import ast +import os +import inspect +import importlib +from pathlib import Path + + +def extract_function_signatures(file_path): + with open(file_path, 'r', encoding='utf-8') as file: + source_code = file.read() + + tree = ast.parse(source_code) + function_signatures = [] + for node in ast.walk(tree): + if isinstance(node, ast.FunctionDef): + # 只提取用户自定义函数,排除内置函数 + if not (node.name.startswith('__') and node.name.endswith('__')): + # 获取函数名 + function_name = node.name + # 获取参数列表 + args = [arg.arg for arg in node.args.args] + # 获取函数签名 + function_signature = f"{function_name}({', '.join(args)})" + # 导入函数 + module = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] + module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module}") + # 获取函数注释 + function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name))} + function_signatures.append(function_schema) + + return function_signatures + + +def get_function_signatures_in_folder(folder_path): + python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')] + all_function_signatures = [] + + for file_name in python_files: + file_path = os.path.join(folder_path, file_name) + function_signatures = extract_function_signatures(file_path) + all_function_signatures.extend(function_signatures) + + return all_function_signatures + + +folder_path = str(Path(__file__).parent.absolute()) +function_signatures = get_function_signatures_in_folder(folder_path) + +UDFS = [func for func in function_signatures + if not func['name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/functions/test_udf.py new file mode 100644 index 000000000..b0c921180 --- /dev/null +++ b/tests/metagpt/tools/functions/test_udf.py @@ -0,0 +1,9 @@ +from metagpt.tools.functions.libs.udf import UDFS +from metagpt.logs import logger + + +def test_udfs(): + assert len(UDFS) > 0 + assert 'name' in UDFS[0] + assert 'doc' in UDFS[0] + logger.info(UDFS) From ee1e8609a6523995ca002e4a6c4b1ea792cda1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 10:08:15 +0800 Subject: [PATCH 15/68] add function path for function_signatures. --- metagpt/tools/functions/libs/udf/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 0bdf84d87..c90357b5c 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -25,7 +25,8 @@ def extract_function_signatures(file_path): module = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module}") # 获取函数注释 - function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name))} + function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name)), + 'path': f'from metagpt.tools.functions.libs.udf.{module} import function_name'} function_signatures.append(function_schema) return function_signatures From 0278934131ff53d8a83fcb46a4b17c6c262ac28f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 10:22:55 +0800 Subject: [PATCH 16/68] chore --- metagpt/actions/make_tools.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 2b2ba1cd5..74037e900 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -49,7 +49,6 @@ class MakeTools(WriteCodeByGenerate): saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") saved_path.write_text(tool_code, encoding='utf-8') - # TODO: 保存到udf中,供WriteCodeWithMakeTools使用 @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def run(self, code_message: List[Message | Dict], **kwargs) -> str: From db96644a0842f30545ed7de106ed01c3cdb75cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 10:45:15 +0800 Subject: [PATCH 17/68] chore --- metagpt/tools/functions/libs/udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index c90357b5c..c581dd992 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -24,7 +24,7 @@ def extract_function_signatures(file_path): # 导入函数 module = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module}") - # 获取函数注释 + # 获取函数注释和函数路径 function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name)), 'path': f'from metagpt.tools.functions.libs.udf.{module} import function_name'} function_signatures.append(function_schema) From 10d488c49a2aaf93f93e5ff43daf58811b5cd195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 11:12:17 +0800 Subject: [PATCH 18/68] fix: path in function_signatures. --- metagpt/tools/functions/libs/udf/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index c581dd992..e44e97c41 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -22,11 +22,11 @@ def extract_function_signatures(file_path): # 获取函数签名 function_signature = f"{function_name}({', '.join(args)})" # 导入函数 - module = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] - module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module}") + module_name = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] + module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}") # 获取函数注释和函数路径 function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name)), - 'path': f'from metagpt.tools.functions.libs.udf.{module} import function_name'} + 'path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}'} function_signatures.append(function_schema) return function_signatures From b7624d7298536135e84c1af1f08ad3e51bf09093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 14:41:43 +0800 Subject: [PATCH 19/68] feat: add WriteCodeWithUDFs. --- metagpt/actions/write_analysis_code.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 1127dc78b..725c4aa2a 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -7,6 +7,7 @@ from typing import Dict, List, Union, Tuple from metagpt.actions import Action +from metagpt.llm import LLM from metagpt.logs import logger from metagpt.prompts.ml_engineer import ( TOOL_RECOMMENDATION_PROMPT, @@ -19,7 +20,7 @@ from metagpt.prompts.ml_engineer import ( ) from metagpt.schema import Message, Plan from metagpt.tools.functions import registry -from metagpt.utils.common import create_func_config +from metagpt.utils.common import create_func_config, CodeParser class BaseWriteAnalysisCode(Action): @@ -203,3 +204,24 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS) rsp = await self.llm.aask_code(prompt, **tool_config) return rsp["code"] + + +class WriteCodeWithUDFs(WriteCodeByGenerate): + """Write code with user defined function.""" + from metagpt.tools.functions.libs.udf import UDFS + + DEFAULT_SYSTEM_MSG = f"""Please remember these functions, you will use these functions to write code:\n + {UDFS} + """ + + async def aask_code_and_text(self, context: List[Dict], **kwargs) -> Tuple[str]: + rsp = await self.llm.acompletion(context, **kwargs) + rsp_content = self.llm.get_choice_text(rsp) + code = CodeParser.parse_code(None, rsp_content) + return code, rsp_content + + async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "", **kwargs) -> str: + prompt = self.process_msg(context) + logger.info(prompt[-1]) + code, _ = await self.aask_code_and_text(prompt, **kwargs) + return code From 116e7718babf53904d0fb3a76b168d23fc1b46d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 14:42:29 +0800 Subject: [PATCH 20/68] add test_write_code_with_udfs. --- tests/metagpt/actions/test_write_analysis_code.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py index 661202115..c3e7adc1b 100644 --- a/tests/metagpt/actions/test_write_analysis_code.py +++ b/tests/metagpt/actions/test_write_analysis_code.py @@ -1,7 +1,7 @@ import asyncio import pytest -from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools +from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools, WriteCodeWithUDFs from metagpt.actions.execute_code import ExecutePyCode from metagpt.schema import Message, Plan, Task from metagpt.logs import logger @@ -311,3 +311,15 @@ async def test_write_code_reuse_code_long_for_wine(): success_rate = sum(success) / trials_num logger.info(f"success rate: {success_rate :.2f}") assert success_rate >= 0.8 + + +@pytest.mark.asyncio +async def test_write_code_with_udfs(): + wudf = WriteCodeWithUDFs() + ep = ExecutePyCode() + rsp = await wudf.run("Get Apple stock data for the past 90 days.") + logger.info(rsp) + assert 'metagpt' in rsp + output, output_type = await ep.run(rsp) + assert output_type is True + logger.info(output) From 9651cdd735bf82928f3ada3d299d0c442edbfd73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 14:45:06 +0800 Subject: [PATCH 21/68] update function_schema. --- metagpt/tools/functions/libs/udf/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index e44e97c41..c9c818a96 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -25,8 +25,9 @@ def extract_function_signatures(file_path): module_name = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}") # 获取函数注释和函数路径 - function_schema = {'name': function_signature, 'doc': inspect.getdoc(getattr(module, function_name)), - 'path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}'} + function_schema = {'udf_name': function_signature, + 'udf_path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}', + 'udf_doc': inspect.getdoc(getattr(module, function_name))} function_signatures.append(function_schema) return function_signatures @@ -48,4 +49,4 @@ folder_path = str(Path(__file__).parent.absolute()) function_signatures = get_function_signatures_in_folder(folder_path) UDFS = [func for func in function_signatures - if not func['name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] + if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] From 86e320be1187ef4738a8000e270cc69cdbf31030 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 14:57:22 +0800 Subject: [PATCH 22/68] update for no_udf_found. --- metagpt/actions/write_analysis_code.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 725c4aa2a..663f76b7b 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -211,13 +211,16 @@ class WriteCodeWithUDFs(WriteCodeByGenerate): from metagpt.tools.functions.libs.udf import UDFS DEFAULT_SYSTEM_MSG = f"""Please remember these functions, you will use these functions to write code:\n - {UDFS} + {UDFS}, **Notice: 1. if no right udf for user requirement, please send `No udf found`** """ async def aask_code_and_text(self, context: List[Dict], **kwargs) -> Tuple[str]: rsp = await self.llm.acompletion(context, **kwargs) rsp_content = self.llm.get_choice_text(rsp) code = CodeParser.parse_code(None, rsp_content) + if code.startswith('No udf found') or rsp_content.startswith('No udf found'): + rsp_content = 'No udf found' + code = 'No udf found' return code, rsp_content async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "", **kwargs) -> str: From 3fc5080b811f44e7ac6ff90458cd48a424c2ca50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 12 Dec 2023 14:58:10 +0800 Subject: [PATCH 23/68] add test_write_code_with_udfs_no_udf_found. --- tests/metagpt/actions/test_write_analysis_code.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py index c3e7adc1b..71628d439 100644 --- a/tests/metagpt/actions/test_write_analysis_code.py +++ b/tests/metagpt/actions/test_write_analysis_code.py @@ -323,3 +323,11 @@ async def test_write_code_with_udfs(): output, output_type = await ep.run(rsp) assert output_type is True logger.info(output) + + +@pytest.mark.asyncio +async def test_write_code_with_udfs_no_udf_found(): + wudf = WriteCodeWithUDFs() + rsp = await wudf.run("Identify if there is a dog in the picture.") + logger.info(rsp) + assert 'No udf found' in rsp From 0d61e897002242f03f07d124bcc2b922cbd49cf0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 13 Dec 2023 14:59:04 +0800 Subject: [PATCH 24/68] add todo. --- metagpt/tools/functions/libs/udf/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index c9c818a96..5bad9a3a4 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -3,6 +3,7 @@ import os import inspect import importlib from pathlib import Path +from typing import Dict, List def extract_function_signatures(file_path): @@ -50,3 +51,8 @@ function_signatures = get_function_signatures_in_folder(folder_path) UDFS = [func for func in function_signatures if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] + + +# TODO: Create Yaml style UDFS Schema +def udfs2yaml(udfs: List[Dict]) -> Dict: + pass From e67c679b1c13e6572a1934e2fdbb343ded8f81b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 15:55:05 +0800 Subject: [PATCH 25/68] update DEFAULT_SYSTEM_MSG. --- metagpt/actions/make_tools.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 74037e900..590598cc3 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -13,11 +13,12 @@ from metagpt.actions.write_analysis_code import WriteCodeByGenerate class MakeTools(WriteCodeByGenerate): DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n **Notice: - 1. Reflect on whether it meets the requirements of a general function. + 1. Your code must contain a general function start with `def`. 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. 3. Use Google style for function annotations. 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, - and make sure it could be execute in the user's machine.** + and make sure it could be execute in the user's machine. + 5. Do not have missing package references.** """ def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): @@ -50,11 +51,21 @@ class MakeTools(WriteCodeByGenerate): logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") saved_path.write_text(tool_code, encoding='utf-8') - @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + # @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def run(self, code_message: List[Message | Dict], **kwargs) -> str: msgs = self.process_msg(code_message) logger.info(f"Ask: {msgs[-1]}") tool_code = await self.llm.aask_code(msgs, **kwargs) + max_tries, current_try = 3, 1 + func_name = self.parse_function_name(tool_code['code']) + while current_try < max_tries and func_name is None: + logger.warning(f"No function name found in code: \n{tool_code['code']}\n we will retry make tools.") + msgs.append({'role': 'assistant', 'content': 'We need a general function in above code,but not found function.'}) + tool_code = await self.llm.aask_code(msgs, **kwargs) + current_try += 1 + func_name = self.parse_function_name(tool_code['code']) + if func_name is not None: + break logger.info(f"Respond: Got {tool_code} from llm.") self.save(tool_code['code']) return tool_code["code"] From ea84fd34cd79153566e24a72247147c5509b2eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 15:56:38 +0800 Subject: [PATCH 26/68] chore --- metagpt/actions/make_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py index 590598cc3..c23e19edb 100644 --- a/metagpt/actions/make_tools.py +++ b/metagpt/actions/make_tools.py @@ -51,7 +51,7 @@ class MakeTools(WriteCodeByGenerate): logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") saved_path.write_text(tool_code, encoding='utf-8') - # @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def run(self, code_message: List[Message | Dict], **kwargs) -> str: msgs = self.process_msg(code_message) logger.info(f"Ask: {msgs[-1]}") From b5833397a4a12a46f41d02e6f2b44edadd48c3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 20:18:20 +0800 Subject: [PATCH 27/68] feat: convert functions docstring schema to yaml --- metagpt/tools/functions/libs/udf/__init__.py | 77 +++++++++++++++++--- tests/metagpt/tools/functions/test_udf.py | 49 ++++++++++++- 2 files changed, 114 insertions(+), 12 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 5bad9a3a4..0cada9545 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -1,9 +1,12 @@ import ast import os +import re +import yaml import inspect import importlib from pathlib import Path from typing import Dict, List +from metagpt.logs import logger def extract_function_signatures(file_path): @@ -12,6 +15,7 @@ def extract_function_signatures(file_path): tree = ast.parse(source_code) function_signatures = [] + function_returns = [] for node in ast.walk(tree): if isinstance(node, ast.FunctionDef): # 只提取用户自定义函数,排除内置函数 @@ -30,29 +34,84 @@ def extract_function_signatures(file_path): 'udf_path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}', 'udf_doc': inspect.getdoc(getattr(module, function_name))} function_signatures.append(function_schema) - - return function_signatures + # 获取函数返回变量名 + source_lines, _ = inspect.getsourcelines(getattr(module, function_name)) + for line in source_lines: + if line.strip().startswith("return "): + function_returns.append({ + 'udf_name': function_name, + 'udf_returns': [var.strip() for var in line.strip()[len("return "):].split(',')] + }) + break + return function_signatures, function_returns def get_function_signatures_in_folder(folder_path): python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')] all_function_signatures = [] + all_function_returns = [] for file_name in python_files: file_path = os.path.join(folder_path, file_name) - function_signatures = extract_function_signatures(file_path) + function_signatures, function_returns = extract_function_signatures(file_path) all_function_signatures.extend(function_signatures) + all_function_returns.extend(function_returns) + return all_function_signatures, all_function_returns - return all_function_signatures + +# TODO: Create Tools Yaml Style Schema +def docstring_to_yaml(docstring: str, return_vars: List[str] = None): + logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n") + if docstring is None: + return {} + # 匹配简介部分 + description_match = re.search(r'^(.*?)(?:Args:|Returns:|Raises:|$)', docstring, re.DOTALL) + description = description_match.group(1).strip() if description_match else "" + + # 匹配Args部分 + args_match = re.search(r'Args:\s*(.*?)(?:Returns:|Raises:|$)', docstring, re.DOTALL) + _args = args_match.group(1).strip() if args_match else "" + variable_pattern = re.compile(r'(\w+)\s*\((.*?)\):\s*(.*)') + params = variable_pattern.findall(_args) + if not params: + err_msg = f"No Args found in docstring as following, Please make sure it is google style\ + : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." + logger.error(err_msg) + raise ValueError(err_msg) + # 匹配Returns部分 + returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) + returns = returns_match.group(1).strip() if returns_match else "" + return_pattern = re.compile(r'^(.*)\s*:\s*(.*)$') + # 添加返回值变量名 + return_vars = return_vars if isinstance(return_vars, list) else [return_vars] + returns = [(r, *r_desc) for r_desc, r in zip(return_pattern.findall(returns), return_vars)] + # 构建YAML字典 + yaml_data = { + 'description': description.strip('.').strip(), + 'parameters': { + 'properties': {param[0]: {'type': param[1], 'description': param[2]} for param in params}, + 'required': [param[0] for param in params] + }, + 'returns': {ret[0]: {'type': ret[1], 'description': ret[2]} for ret in returns} + } + return yaml_data + + +def extract_function_schema_yaml_in_folder(folder_path: str): + function_signatures, function_returns = get_function_signatures_in_folder(folder_path) + function_schema_yaml_data = {} + for func_docstring, func_returns in zip(function_signatures, function_returns): + if func_docstring['udf_doc']: + fun_yaml_data = docstring_to_yaml(func_docstring['udf_doc'], func_returns['udf_returns']) + fun_yaml_data.update({'type': 'function'}) + function_schema_yaml_data.update({func_returns['udf_name']: fun_yaml_data}) + return yaml.dump(function_schema_yaml_data, default_flow_style=False) folder_path = str(Path(__file__).parent.absolute()) -function_signatures = get_function_signatures_in_folder(folder_path) +function_signatures, function_returns = get_function_signatures_in_folder(folder_path) UDFS = [func for func in function_signatures if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] - -# TODO: Create Yaml style UDFS Schema -def udfs2yaml(udfs: List[Dict]) -> Dict: - pass +UDFS_YAML = extract_function_schema_yaml_in_folder(folder_path) diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/functions/test_udf.py index b0c921180..89897e548 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/functions/test_udf.py @@ -1,9 +1,52 @@ -from metagpt.tools.functions.libs.udf import UDFS +import pytest +import yaml + +from metagpt.tools.functions.libs.udf import UDFS, docstring_to_yaml, UDFS_YAML from metagpt.logs import logger def test_udfs(): assert len(UDFS) > 0 - assert 'name' in UDFS[0] - assert 'doc' in UDFS[0] + assert 'udf_name' in UDFS[0] + assert 'udf_doc' in UDFS[0] logger.info(UDFS) + + +def test_docstring2yaml(): + docstring = """Calculate the duration in hours between two datetime columns. + + Args: + dataframe (pd.DataFrame): The dataframe containing the datetime columns. + + Returns: + pd.DataFrame: The dataframe with an additional column 'duration_hour' added. + """ + + yaml_result = docstring_to_yaml(docstring, return_vars='dataframe') + assert 'parameters' in yaml_result + assert 'properties' in yaml_result['parameters'] + assert 'dataframe' in yaml_result['parameters']['properties'] + + +def test_docstring2yaml_error(): + docstring = """Calculate the duration in hours between two datetime columns. + args: + dataframe (pd.DataFrame): The dataframe containing the datetime columns. + returns: + pd.DataFrame: The dataframe with an additional column 'duration_hour' added. + """ + with pytest.raises(ValueError) as exc_info: + docstring_to_yaml(docstring, return_vars='dataframe') + assert "No Args found" in exc_info + + +def test_UDFS_YAML(): + assert len(UDFS_YAML) > 0 + logger.info(f"\n\n{UDFS_YAML}") + function_schema = yaml.load(UDFS_YAML, Loader=yaml.FullLoader) + assert 'description' in function_schema[list(function_schema.keys())[0]] + assert 'type' in function_schema[list(function_schema.keys())[0]] + assert 'parameters' in function_schema[list(function_schema.keys())[0]] + assert 'properties' in function_schema[list(function_schema.keys())[0]]['parameters'] + assert 'required' in function_schema[list(function_schema.keys())[0]]['parameters'] + assert 'returns' in function_schema[list(function_schema.keys())[0]] From 1a2b4f1b3b08c8f046a179864ab5e6d5f57086df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 20:40:11 +0800 Subject: [PATCH 28/68] update make_tools. --- metagpt/actions/make_tools.py | 71 ------------------------ tests/metagpt/actions/test_make_tools.py | 2 +- 2 files changed, 1 insertion(+), 72 deletions(-) delete mode 100644 metagpt/actions/make_tools.py diff --git a/metagpt/actions/make_tools.py b/metagpt/actions/make_tools.py deleted file mode 100644 index c23e19edb..000000000 --- a/metagpt/actions/make_tools.py +++ /dev/null @@ -1,71 +0,0 @@ -from typing import List, Dict -from pathlib import Path -import re - -from tenacity import retry, stop_after_attempt, wait_fixed - -from metagpt.llm import LLM -from metagpt.logs import logger -from metagpt.schema import Message -from metagpt.actions.write_analysis_code import WriteCodeByGenerate - - -class MakeTools(WriteCodeByGenerate): - DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n - **Notice: - 1. Your code must contain a general function start with `def`. - 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. - 3. Use Google style for function annotations. - 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, - and make sure it could be execute in the user's machine. - 5. Do not have missing package references.** - """ - - def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): - """ - :param str name: name, defaults to '' - :param list[Message] context: context, defaults to None - :param LLM llm: llm, defaults to None - :param str workspace: tools code saved file path dir, defaults to None - """ - super().__init__(name, context, llm) - self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf")) - self.file_suffix: str = '.py' - - def parse_function_name(self, function_code: str) -> str: - # 定义正则表达式模式 - pattern = r'\bdef\s+([a-zA-Z_]\w*)\s*\(' - # 在代码中搜索匹配的模式 - match = re.search(pattern, function_code) - # 如果找到匹配项,则返回匹配的函数名;否则返回None - if match: - return match.group(1) - else: - return None - - def save(self, tool_code: str) -> None: - func_name = self.parse_function_name(tool_code) - if func_name is None: - raise ValueError(f"No function name found in {tool_code}") - saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) - logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") - saved_path.write_text(tool_code, encoding='utf-8') - - @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) - async def run(self, code_message: List[Message | Dict], **kwargs) -> str: - msgs = self.process_msg(code_message) - logger.info(f"Ask: {msgs[-1]}") - tool_code = await self.llm.aask_code(msgs, **kwargs) - max_tries, current_try = 3, 1 - func_name = self.parse_function_name(tool_code['code']) - while current_try < max_tries and func_name is None: - logger.warning(f"No function name found in code: \n{tool_code['code']}\n we will retry make tools.") - msgs.append({'role': 'assistant', 'content': 'We need a general function in above code,but not found function.'}) - tool_code = await self.llm.aask_code(msgs, **kwargs) - current_try += 1 - func_name = self.parse_function_name(tool_code['code']) - if func_name is not None: - break - logger.info(f"Respond: Got {tool_code} from llm.") - self.save(tool_code['code']) - return tool_code["code"] diff --git a/tests/metagpt/actions/test_make_tools.py b/tests/metagpt/actions/test_make_tools.py index 264599439..cf7986b82 100644 --- a/tests/metagpt/actions/test_make_tools.py +++ b/tests/metagpt/actions/test_make_tools.py @@ -1,7 +1,7 @@ import pytest from metagpt.actions.execute_code import ExecutePyCode -from metagpt.actions.make_tools import MakeTools +from metagpt.actions.write_analysis_code import MakeTools from metagpt.logs import logger From b18b1c366ead8cc4e2b950145d56d4885b1e6060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 21:57:45 +0800 Subject: [PATCH 29/68] update UDFS. --- metagpt/tools/functions/libs/udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 0cada9545..8c74bbbe3 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -112,6 +112,6 @@ folder_path = str(Path(__file__).parent.absolute()) function_signatures, function_returns = get_function_signatures_in_folder(folder_path) UDFS = [func for func in function_signatures - if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder'))] + if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder', 'docstring_to_yaml'))] UDFS_YAML = extract_function_schema_yaml_in_folder(folder_path) From a71b75a8a928744f8bf1742e56fa51e56365314e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 22:10:28 +0800 Subject: [PATCH 30/68] feat: MakeTools, WriteCodeWithUDFs. --- metagpt/actions/write_analysis_code.py | 104 +++++++++++++++++++++++-- 1 file changed, 96 insertions(+), 8 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 663f76b7b..c41e0fc5a 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -5,6 +5,10 @@ @File : write_code_v2.py """ from typing import Dict, List, Union, Tuple +from tenacity import retry, stop_after_attempt, wait_fixed +from pathlib import Path +import re +import json from metagpt.actions import Action from metagpt.llm import LLM @@ -86,7 +90,6 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode): self, context: [List[Message]], plan: Plan = None, - code_steps: str = "", system_msg: str = None, **kwargs, ) -> str: @@ -206,25 +209,110 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): return rsp["code"] +class MakeTools(WriteCodeByGenerate): + DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n + **Notice: + 1. Your code must contain a general function start with `def`. + 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. + 3. Use Google style for function annotations. + 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, + and make sure it could be execute in the user's machine. + 5. Do not have missing package references.** + """ + + def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): + """ + :param str name: name, defaults to '' + :param list[Message] context: context, defaults to None + :param LLM llm: llm, defaults to None + :param str workspace: tools code saved file path dir, defaults to None + """ + super().__init__(name, context, llm) + self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf")) + self.file_suffix: str = '.py' + + def parse_function_name(self, function_code: str) -> str: + # 定义正则表达式模式 + pattern = r'\bdef\s+([a-zA-Z_]\w*)\s*\(' + # 在代码中搜索匹配的模式 + match = re.search(pattern, function_code) + # 如果找到匹配项,则返回匹配的函数名;否则返回None + if match: + return match.group(1) + else: + return None + + def save(self, tool_code: str) -> None: + func_name = self.parse_function_name(tool_code) + if func_name is None: + raise ValueError(f"No function name found in {tool_code}") + saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) + logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") + saved_path.write_text(tool_code, encoding='utf-8') + + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + async def run(self, code_message: List[Message | Dict], **kwargs) -> str: + msgs = self.process_msg(code_message) + logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {msgs[-1]}") + tool_code = await self.llm.aask_code(msgs, **kwargs) + max_tries, current_try = 3, 1 + func_name = self.parse_function_name(tool_code['code']) + while current_try < max_tries and func_name is None: + logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") + logger.warning(f"No function name found in code, we will retry make tools. \n\n{tool_code['code']}\n") + msgs.append({'role': 'assistant', 'content': 'We need a general function in above code,but not found function.'}) + tool_code = await self.llm.aask_code(msgs, **kwargs) + current_try += 1 + func_name = self.parse_function_name(tool_code['code']) + if func_name is not None: + break + self.save(tool_code['code']) + return tool_code["code"] + + class WriteCodeWithUDFs(WriteCodeByGenerate): """Write code with user defined function.""" from metagpt.tools.functions.libs.udf import UDFS - DEFAULT_SYSTEM_MSG = f"""Please remember these functions, you will use these functions to write code:\n - {UDFS}, **Notice: 1. if no right udf for user requirement, please send `No udf found`** + UDFS_DEFAULT_SYSTEM_MSG = f"""Please remember these functions, you will use these functions to write code:\n + {UDFS}, **Notice: 1. if no udf meets user requirement, please send `No udf found`. 2.Only use function code provied to you. + 3. Dont generate code from scratch.** """ async def aask_code_and_text(self, context: List[Dict], **kwargs) -> Tuple[str]: rsp = await self.llm.acompletion(context, **kwargs) rsp_content = self.llm.get_choice_text(rsp) code = CodeParser.parse_code(None, rsp_content) - if code.startswith('No udf found') or rsp_content.startswith('No udf found'): + if 'No udf found' in code or 'No udf found' in rsp_content: rsp_content = 'No udf found' code = 'No udf found' return code, rsp_content - async def run(self, context: List[Message], plan: Plan = None, task_guide: str = "", **kwargs) -> str: - prompt = self.process_msg(context) - logger.info(prompt[-1]) - code, _ = await self.aask_code_and_text(prompt, **kwargs) + async def run(self, context: List[Message], plan: Plan = None, **kwargs) -> str: + from metagpt.tools.functions.libs.udf import UDFS + if len(UDFS) > 0: + # Write code from user defined function. + prompt = self.process_msg(context, self.UDFS_DEFAULT_SYSTEM_MSG) + logger.info(prompt[-1]) + try: + logger.info("Local user defined function as following:") + logger.info(json.dumps(UDFS, indent=4, ensure_ascii=False)) + except Exception: + from pprint import pprint + pprint(UDFS) + logger.info('Writing code from user defined function by LLM...') + code, _ = await self.aask_code_and_text(prompt, **kwargs) + logger.info(f"Writing code from user defined function: \n{'-'*50}\n {code}") + if code != 'No udf found': + return code + logger.warning("No udf found, we will write code from scratch by LLM.") + # Writing code from scratch. + logger.warning("Writing code from scratch by LLM.") + code = await super().run(context, plan, self.DEFAULT_SYSTEM_MSG, **kwargs) + logger.info(f"Code Writing code from scratch by LLM is :\n{'-'*60}\n {code}") + # Make tools for above code. + logger.info("Make tools for above code.") + make_tools = MakeTools() + tool_code = await make_tools.run(code) + make_tools.save(tool_code) return code From 79787e8129119b9b4a848a54c26e4215225b9798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 22:15:32 +0800 Subject: [PATCH 31/68] feat: add make tools. --- metagpt/roles/ml_engineer.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index e7fe38ff4..b039c61e7 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -9,7 +9,7 @@ from metagpt.schema import Message, Plan from metagpt.memory import Memory from metagpt.logs import logger from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp -from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools +from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools, MakeTools from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst from metagpt.actions.execute_code import ExecutePyCode from metagpt.roles.kaggle_manager import DownloadData, SubmitResult @@ -126,6 +126,10 @@ class MLEngineer(Role): context=context, plan=self.plan, code_steps=code_steps, temperature=0.0 ) cause_by = WriteCodeByGenerate + # make and save tools. + make_tools = MakeTools() + tool_code = await make_tools.run(code) + make_tools.save(tool_code) else: code = await WriteCodeWithTools().run( context=context, plan=self.plan, code_steps=code_steps, data_desc="" From 52052c82447fcb7d92108723b52274f8788f52c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 22:30:14 +0800 Subject: [PATCH 32/68] update make tools. --- metagpt/actions/write_analysis_code.py | 62 ++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 839184cdc..4194bafc9 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -220,3 +220,65 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): rsp = await self.llm.aask_code(prompt, **tool_config) context = [Message(content=prompt, role="user")] return context, rsp["code"] + + +class MakeTools(WriteCodeByGenerate): + DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n + **Notice: + 1. Your code must contain a general function start with `def`. + 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. + 3. Use Google style for function annotations. + 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, + and make sure it could be execute in the user's machine. + 5. Dont have missing package references.** + """ + + def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): + """ + :param str name: name, defaults to '' + :param list[Message] context: context, defaults to None + :param LLM llm: llm, defaults to None + :param str workspace: tools code saved file path dir, defaults to None + """ + super().__init__(name, context, llm) + self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf")) + self.file_suffix: str = '.py' + + def parse_function_name(self, function_code: str) -> str: + # 定义正则表达式模式 + pattern = r'\bdef\s+([a-zA-Z_]\w*)\s*\(' + # 在代码中搜索匹配的模式 + match = re.search(pattern, function_code) + # 如果找到匹配项,则返回匹配的函数名;否则返回None + if match: + return match.group(1) + else: + return None + + def save(self, tool_code: str) -> None: + func_name = self.parse_function_name(tool_code) + if func_name is None: + raise ValueError(f"No function name found in {tool_code}") + saved_path = Path(self.workspace).joinpath(func_name+self.file_suffix) + logger.info(f"Saved tool_code {func_name} in {str(saved_path)}.") + saved_path.write_text(tool_code, encoding='utf-8') + + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + async def run(self, code_message: List[Message | Dict], **kwargs) -> str: + msgs = self.process_msg(code_message) + logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {msgs[-1]}") + tool_code = await self.llm.aask_code(msgs, **kwargs) + max_tries, current_try = 3, 1 + func_name = self.parse_function_name(tool_code['code']) + while current_try < max_tries and func_name is None: + logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") + logger.warning(f"No function name found in code, we will retry make tools. \n\n{tool_code['code']}\n") + msgs.append({'role': 'assistant', 'content': 'We need a general function in above code,but not found function.'}) + tool_code = await self.llm.aask_code(msgs, **kwargs) + current_try += 1 + func_name = self.parse_function_name(tool_code['code']) + if func_name is not None: + break + logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") + self.save(tool_code['code']) + return tool_code["code"] From 87821fc6cca7181e32a4d0e740cff531a3cb7cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Mon, 18 Dec 2023 22:33:58 +0800 Subject: [PATCH 33/68] update make tools. --- metagpt/roles/ml_engineer.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 28ff9fb3d..1361c566f 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -8,7 +8,7 @@ from metagpt.actions import Action from metagpt.actions.debug_code import DebugCode from metagpt.actions.execute_code import ExecutePyCode from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst -from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools +from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools, MakeTools from metagpt.actions.write_code_steps import WriteCodeSteps from metagpt.actions.write_plan import WritePlan from metagpt.actions.write_plan import update_plan_from_rsp, precheck_update_plan_from_rsp @@ -48,6 +48,7 @@ class MLEngineer(Role): self.plan = Plan(goal=goal) self.use_tools = False + self.make_tools = True self.use_code_steps = False self.execute_code = ExecutePyCode() self.auto_run = auto_run @@ -173,10 +174,11 @@ class MLEngineer(Role): ) debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] cause_by = WriteCodeByGenerate - # make and save tools. - make_tools = MakeTools() - tool_code = await make_tools.run(code) - make_tools.save(tool_code) + if self.make_tools: + # make and save tools. + make_tools = MakeTools() + tool_code = await make_tools.run(code) + make_tools.save(tool_code) else: logger.info("Write code with tools") schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas" From 4cb2028c7240f8be607a9b9f57cdfb47bd197117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 10:24:57 +0800 Subject: [PATCH 34/68] update for make tools test. --- metagpt/roles/ml_engineer.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 1361c566f..75c403226 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -48,7 +48,8 @@ class MLEngineer(Role): self.plan = Plan(goal=goal) self.use_tools = False - self.make_tools = True + self.make_udfs = False + self.use_udfs = False self.use_code_steps = False self.execute_code = ExecutePyCode() self.auto_run = auto_run @@ -168,14 +169,19 @@ class MLEngineer(Role): logger.info(f"new code \n{code}") cause_by = DebugCode elif not self.use_tools or self.plan.current_task.task_type == "other": - logger.info("Write code with pure generation") - code = await WriteCodeByGenerate().run( - context=context, plan=self.plan, temperature=0.0 - ) - debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] - cause_by = WriteCodeByGenerate - if self.make_tools: - # make and save tools. + if self.use_udfs: + # use user-defined function tools. + pass + else: + logger.info("Write code with pure generation") + code = await WriteCodeByGenerate().run( + context=context, plan=self.plan, temperature=0.0 + ) + debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] + cause_by = WriteCodeByGenerate + + if self.make_udfs: + # make and save user-defined function tools. make_tools = MakeTools() tool_code = await make_tools.run(code) make_tools.save(tool_code) @@ -291,6 +297,7 @@ if __name__ == "__main__": async def main(requirement: str = requirement, auto_run: bool = True): role = MLEngineer(goal=requirement, auto_run=auto_run) + role.make_udfs = True await role.run(requirement) fire.Fire(main) From d9c814420b5e31430e7143d4b430404c4ce8f63c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 11:21:51 +0800 Subject: [PATCH 35/68] fix: no args error. --- metagpt/tools/functions/libs/udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 8c74bbbe3..5596cd37a 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -77,7 +77,7 @@ def docstring_to_yaml(docstring: str, return_vars: List[str] = None): err_msg = f"No Args found in docstring as following, Please make sure it is google style\ : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." logger.error(err_msg) - raise ValueError(err_msg) + params = (('', '', ''),) # 匹配Returns部分 returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) returns = returns_match.group(1).strip() if returns_match else "" From c1a3a12c9250a582f7348067a615c52c85fd6c2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 11:27:26 +0800 Subject: [PATCH 36/68] update udf test for function schema. --- tests/metagpt/tools/functions/test_udf.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/functions/test_udf.py index 89897e548..111ec532a 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/functions/test_udf.py @@ -28,18 +28,6 @@ def test_docstring2yaml(): assert 'dataframe' in yaml_result['parameters']['properties'] -def test_docstring2yaml_error(): - docstring = """Calculate the duration in hours between two datetime columns. - args: - dataframe (pd.DataFrame): The dataframe containing the datetime columns. - returns: - pd.DataFrame: The dataframe with an additional column 'duration_hour' added. - """ - with pytest.raises(ValueError) as exc_info: - docstring_to_yaml(docstring, return_vars='dataframe') - assert "No Args found" in exc_info - - def test_UDFS_YAML(): assert len(UDFS_YAML) > 0 logger.info(f"\n\n{UDFS_YAML}") @@ -50,3 +38,11 @@ def test_UDFS_YAML(): assert 'properties' in function_schema[list(function_schema.keys())[0]]['parameters'] assert 'required' in function_schema[list(function_schema.keys())[0]]['parameters'] assert 'returns' in function_schema[list(function_schema.keys())[0]] + # 指定要保存的文件路径 + file_path = './tests/data/function_schema.yaml' + + # 使用 PyYAML 将字典保存为 YAML 文件 + with open(file_path, 'w') as file: + yaml.dump(function_schema, file, default_flow_style=False) + + print(f'Data has been saved to {file_path}') From 4de104ef8f3bff4a486e058354c9038a378f025b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 11:32:25 +0800 Subject: [PATCH 37/68] update parameters for None. --- metagpt/tools/functions/libs/udf/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 5596cd37a..3c4e72d8b 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -77,7 +77,7 @@ def docstring_to_yaml(docstring: str, return_vars: List[str] = None): err_msg = f"No Args found in docstring as following, Please make sure it is google style\ : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." logger.error(err_msg) - params = (('', '', ''),) + params = ((None, None, None),) # 匹配Returns部分 returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) returns = returns_match.group(1).strip() if returns_match else "" @@ -89,8 +89,8 @@ def docstring_to_yaml(docstring: str, return_vars: List[str] = None): yaml_data = { 'description': description.strip('.').strip(), 'parameters': { - 'properties': {param[0]: {'type': param[1], 'description': param[2]} for param in params}, - 'required': [param[0] for param in params] + 'properties': {param[0]: {'type': param[1], 'description': param[2]} for param in params if param[0] is not None}, + 'required': [param[0] for param in params if param[0] is not None] }, 'returns': {ret[0]: {'type': ret[1], 'description': ret[2]} for ret in returns} } From 0daf7ea4e3bfd8af5de11788d4fa5e295b98cf5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 11:34:06 +0800 Subject: [PATCH 38/68] chore. --- metagpt/tools/functions/libs/udf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 3c4e72d8b..add03f376 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -76,7 +76,7 @@ def docstring_to_yaml(docstring: str, return_vars: List[str] = None): if not params: err_msg = f"No Args found in docstring as following, Please make sure it is google style\ : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." - logger.error(err_msg) + logger.warning(err_msg) params = ((None, None, None),) # 匹配Returns部分 returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) From 7ddca9e99564e6d102d4d8b443effbbddedb774c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 14:36:33 +0800 Subject: [PATCH 39/68] update MakeTools DEFAULT_SYSTEM_MSG. --- metagpt/actions/write_analysis_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 4194bafc9..0a1d74263 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -223,7 +223,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): class MakeTools(WriteCodeByGenerate): - DEFAULT_SYSTEM_MSG = """Please Create a very General Function Code startswith `def` from any codes you got.\n + DEFAULT_SYSTEM_MSG = """Convert any codes provied for you to a very General Function Code startswith `def`.\n **Notice: 1. Your code must contain a general function start with `def`. 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. From fdf16f55352102d002223af6ee4d054622be0e79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 14:38:08 +0800 Subject: [PATCH 40/68] add code_prompt for make tools. --- metagpt/roles/ml_engineer.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 75c403226..96e21c8c8 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -180,10 +180,12 @@ class MLEngineer(Role): debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] cause_by = WriteCodeByGenerate - if self.make_udfs: + if self.make_udfs and len(code.split('\n')) > 2: # make and save user-defined function tools. make_tools = MakeTools() - tool_code = await make_tools.run(code) + code_prompt = f"The following code is about {self.plan.current_task.instruction},\ + convert it to be a General Function, {code}" + tool_code = await make_tools.run(code_prompt) make_tools.save(tool_code) else: logger.info("Write code with tools") From a4ba5660b82a528ae876c30336623e9f33afdf24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 16:30:38 +0800 Subject: [PATCH 41/68] convert UDFS_YAML to dict. --- metagpt/tools/functions/libs/udf/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index add03f376..ad36b2817 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -114,4 +114,5 @@ function_signatures, function_returns = get_function_signatures_in_folder(folder UDFS = [func for func in function_signatures if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder', 'docstring_to_yaml'))] -UDFS_YAML = extract_function_schema_yaml_in_folder(folder_path) +UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path) +UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader) From 3bb445b925ba5901bd0e5d9e4e1339c3c60c13dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 16:50:37 +0800 Subject: [PATCH 42/68] fix: no returns function tools. --- metagpt/tools/functions/libs/udf/__init__.py | 14 ++++++++++---- tests/metagpt/tools/functions/test_udf.py | 5 +++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index ad36b2817..245288de2 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -43,11 +43,18 @@ def extract_function_signatures(file_path): 'udf_returns': [var.strip() for var in line.strip()[len("return "):].split(',')] }) break + + # 没有返回值的函数 + if not function_returns or function_returns[-1]['udf_name'] != function_name: + function_returns.append({ + 'udf_name': function_name, + 'udf_returns': [None] + }) return function_signatures, function_returns def get_function_signatures_in_folder(folder_path): - python_files = [f for f in os.listdir(folder_path) if f.endswith('.py')] + python_files = [f for f in os.listdir(folder_path) if f.endswith('.py') and f != '__init__.py'] all_function_signatures = [] all_function_returns = [] @@ -59,7 +66,7 @@ def get_function_signatures_in_folder(folder_path): return all_function_signatures, all_function_returns -# TODO: Create Tools Yaml Style Schema +# Create Tools Yaml Style Schema def docstring_to_yaml(docstring: str, return_vars: List[str] = None): logger.debug(f"\n\nFunction Docstring: \n{'-'*60}\n {docstring} \n\nFunction Returns: \n{'-'*60}\n{return_vars}\n") if docstring is None: @@ -111,8 +118,7 @@ def extract_function_schema_yaml_in_folder(folder_path: str): folder_path = str(Path(__file__).parent.absolute()) function_signatures, function_returns = get_function_signatures_in_folder(folder_path) -UDFS = [func for func in function_signatures - if not func['udf_name'].startswith(('extract_function_signatures', 'get_function_signatures_in_folder', 'docstring_to_yaml'))] +UDFS = [func for func in function_signatures] UDFS_YAML_STR: str = extract_function_schema_yaml_in_folder(folder_path) UDFS_YAML: dict = yaml.load(UDFS_YAML_STR, Loader=yaml.FullLoader) diff --git a/tests/metagpt/tools/functions/test_udf.py b/tests/metagpt/tools/functions/test_udf.py index 111ec532a..b4060ad13 100644 --- a/tests/metagpt/tools/functions/test_udf.py +++ b/tests/metagpt/tools/functions/test_udf.py @@ -1,5 +1,6 @@ import pytest import yaml +import json from metagpt.tools.functions.libs.udf import UDFS, docstring_to_yaml, UDFS_YAML from metagpt.logs import logger @@ -30,8 +31,8 @@ def test_docstring2yaml(): def test_UDFS_YAML(): assert len(UDFS_YAML) > 0 - logger.info(f"\n\n{UDFS_YAML}") - function_schema = yaml.load(UDFS_YAML, Loader=yaml.FullLoader) + logger.info(f"\n\n{json.dumps(UDFS_YAML, indent=2, ensure_ascii=False)}") + function_schema = UDFS_YAML assert 'description' in function_schema[list(function_schema.keys())[0]] assert 'type' in function_schema[list(function_schema.keys())[0]] assert 'parameters' in function_schema[list(function_schema.keys())[0]] From 6895e74d3ef0e44ce04a4c2195b96da1d7920edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 17:01:55 +0800 Subject: [PATCH 43/68] update parse No Args function. --- metagpt/tools/functions/libs/udf/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index 245288de2..b74ae2ab9 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -81,9 +81,6 @@ def docstring_to_yaml(docstring: str, return_vars: List[str] = None): variable_pattern = re.compile(r'(\w+)\s*\((.*?)\):\s*(.*)') params = variable_pattern.findall(_args) if not params: - err_msg = f"No Args found in docstring as following, Please make sure it is google style\ - : \n\n{'-'*60}\n{docstring}\n{'-'*60}\n\n." - logger.warning(err_msg) params = ((None, None, None),) # 匹配Returns部分 returns_match = re.search(r'Returns:\s*(.*?)(?:Raises:|$)', docstring, re.DOTALL) From 52b8ba84d32d6d42b6dde75b772d2dd68195c9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 17:57:16 +0800 Subject: [PATCH 44/68] update globals with function tools. --- metagpt/tools/functions/libs/udf/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metagpt/tools/functions/libs/udf/__init__.py b/metagpt/tools/functions/libs/udf/__init__.py index b74ae2ab9..5d9c35b27 100644 --- a/metagpt/tools/functions/libs/udf/__init__.py +++ b/metagpt/tools/functions/libs/udf/__init__.py @@ -29,6 +29,8 @@ def extract_function_signatures(file_path): # 导入函数 module_name = Path(file_path).parts[-1][:-len(Path(file_path).suffix)] module = importlib.import_module(f"metagpt.tools.functions.libs.udf.{module_name}") + # 将函数导入到当前命名空间 + globals().update({function_name: getattr(module, function_name)}) # 获取函数注释和函数路径 function_schema = {'udf_name': function_signature, 'udf_path': f'from metagpt.tools.functions.libs.udf.{module_name} import {function_name}', From cb31ede9c11d1ca7514f75f9abfbb4c5266043b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 17:58:22 +0800 Subject: [PATCH 45/68] add udf in ML_MODULE_MAP. --- metagpt/prompts/ml_engineer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/metagpt/prompts/ml_engineer.py b/metagpt/prompts/ml_engineer.py index 33eb9c40c..cca9649b3 100644 --- a/metagpt/prompts/ml_engineer.py +++ b/metagpt/prompts/ml_engineer.py @@ -301,6 +301,7 @@ ML_SPECIFIC_PROMPT = { ML_MODULE_MAP = { "data_preprocess": "metagpt.tools.functions.libs.data_preprocess", "feature_engineering": "metagpt.tools.functions.libs.feature_engineering", + "udf": "metagpt.tools.functions.libs.udf", } STRUCTURAL_CONTEXT = """ From c7335419ce32b567fc9cc17b9c70d67656bad0e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 18:20:04 +0800 Subject: [PATCH 46/68] fix: BaseWriteAnalysisCode now do not install packages or check packages first. --- metagpt/actions/write_analysis_code.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 0a1d74263..bc069414f 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -29,7 +29,7 @@ from metagpt.utils.common import create_func_config, remove_comments class BaseWriteAnalysisCode(Action): - DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt + DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!""" def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None): @@ -112,13 +112,17 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): if self.schema_path is not None: self._load_tools(schema_path) - def _load_tools(self, schema_path): + def _load_tools(self, schema_path, schema_module=None): """Load tools from yaml file""" - yml_files = schema_path.glob("*.yml") - for yml_file in yml_files: - module = yml_file.stem - with open(yml_file, "r", encoding="utf-8") as f: - self.available_tools[module] = yaml.safe_load(f) + if isinstance(schema_path, dict): + schema_module = schema_module or 'udf' + self.available_tools.update({schema_module: schema_path}) + else: + yml_files = schema_path.glob("*.yml") + for yml_file in yml_files: + module = yml_file.stem + with open(yml_file, "r", encoding="utf-8") as f: + self.available_tools[module] = yaml.safe_load(f) def _parse_recommend_tools(self, module: str, recommend_tools: list) -> dict: """ @@ -174,7 +178,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): column_info: str = "", **kwargs, ) -> Tuple[List[Message], str]: - task_type = plan.current_task.task_type + task_type = plan.current_task.task_type or 'udf' available_tools = self.available_tools.get(task_type, {}) special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "") code_steps = plan.current_task.code_steps @@ -227,7 +231,7 @@ class MakeTools(WriteCodeByGenerate): **Notice: 1. Your code must contain a general function start with `def`. 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. - 3. Use Google style for function annotations. + 3. Must use Google style for function docstring, and your code must have function docstring. 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, and make sure it could be execute in the user's machine. 5. Dont have missing package references.** From 6ed432205bf78831ade0911824aa40914e9a601a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 19 Dec 2023 18:23:21 +0800 Subject: [PATCH 47/68] feat: add use_udfs with WriteCodeWithTools. --- metagpt/roles/ml_engineer.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 96e21c8c8..fa9acadbc 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -48,7 +48,7 @@ class MLEngineer(Role): self.plan = Plan(goal=goal) self.use_tools = False - self.make_udfs = False + self.make_udfs = False # user-defined functions self.use_udfs = False self.use_code_steps = False self.execute_code = ExecutePyCode() @@ -171,7 +171,17 @@ class MLEngineer(Role): elif not self.use_tools or self.plan.current_task.task_type == "other": if self.use_udfs: # use user-defined function tools. - pass + from metagpt.tools.functions.libs.udf import UDFS_YAML + logger.warning("Writing code with user-defined function tools...") + logger.info(f"Local user defined function as following:\ + \n{json.dumps(list(UDFS_YAML.keys()), indent=2, ensure_ascii=False)}") + tool_context, code = await WriteCodeWithTools(schema_path=UDFS_YAML).run( + context=context, + plan=self.plan, + column_info=self.data_desc.get("column_info", ""), + ) + debug_context = tool_context + cause_by = WriteCodeWithTools else: logger.info("Write code with pure generation") code = await WriteCodeByGenerate().run( @@ -180,8 +190,10 @@ class MLEngineer(Role): debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] cause_by = WriteCodeByGenerate - if self.make_udfs and len(code.split('\n')) > 2: + if self.make_udfs and len(code.split('\n')) > 4: # make and save user-defined function tools. + logger.warning(f"Making tools for task_id {self.plan.current_task_id}: \ + `{self.plan.current_task.instruction}` \n code {code}") make_tools = MakeTools() code_prompt = f"The following code is about {self.plan.current_task.instruction},\ convert it to be a General Function, {code}" @@ -299,7 +311,8 @@ if __name__ == "__main__": async def main(requirement: str = requirement, auto_run: bool = True): role = MLEngineer(goal=requirement, auto_run=auto_run) - role.make_udfs = True + role.make_udfs = False + role.use_udfs = True await role.run(requirement) fire.Fire(main) From 8afac012b49df5ffb26dc031345c685c748e8797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 09:57:19 +0800 Subject: [PATCH 48/68] set the plan.current_task.task_type to udf when use udfs. --- metagpt/roles/ml_engineer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index fa9acadbc..3c1853fd5 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -175,6 +175,8 @@ class MLEngineer(Role): logger.warning("Writing code with user-defined function tools...") logger.info(f"Local user defined function as following:\ \n{json.dumps(list(UDFS_YAML.keys()), indent=2, ensure_ascii=False)}") + # set task_type to `udf` + self.plan.current_task.task_type = 'udf' tool_context, code = await WriteCodeWithTools(schema_path=UDFS_YAML).run( context=context, plan=self.plan, @@ -184,6 +186,7 @@ class MLEngineer(Role): cause_by = WriteCodeWithTools else: logger.info("Write code with pure generation") + # TODO: 添加基于current_task.instruction-code_path的k-v缓存 code = await WriteCodeByGenerate().run( context=context, plan=self.plan, temperature=0.0 ) From 19b0120c15c3ad5cce82256f2cdb374df4507f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 09:58:31 +0800 Subject: [PATCH 49/68] restore task_type value. --- metagpt/actions/write_analysis_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index bc069414f..88f22684d 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -178,7 +178,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode): column_info: str = "", **kwargs, ) -> Tuple[List[Message], str]: - task_type = plan.current_task.task_type or 'udf' + task_type = plan.current_task.task_type available_tools = self.available_tools.get(task_type, {}) special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "") code_steps = plan.current_task.code_steps From 913538639ddcf5c129c1681b8734631d0eb4034e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 12:11:42 +0800 Subject: [PATCH 50/68] feat: --- metagpt/roles/ml_engineer.py | 53 +++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 3c1853fd5..052b99ad5 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -21,6 +21,7 @@ from metagpt.prompts.ml_engineer import ( PRINT_DATA_COLUMNS ) from metagpt.roles import Role +from metagpt.roles.role import RoleContext from metagpt.roles.kaggle_manager import DownloadData, SubmitResult from metagpt.schema import Message, Plan from metagpt.utils.common import remove_comments, create_func_config @@ -192,16 +193,6 @@ class MLEngineer(Role): ) debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] cause_by = WriteCodeByGenerate - - if self.make_udfs and len(code.split('\n')) > 4: - # make and save user-defined function tools. - logger.warning(f"Making tools for task_id {self.plan.current_task_id}: \ - `{self.plan.current_task.instruction}` \n code {code}") - make_tools = MakeTools() - code_prompt = f"The following code is about {self.plan.current_task.instruction},\ - convert it to be a General Function, {code}" - tool_code = await make_tools.run(code_prompt) - make_tools.save(tool_code) else: logger.info("Write code with tools") schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas" @@ -219,6 +210,9 @@ class MLEngineer(Role): result, success = await self.execute_code.run(code) print(result) + # make tools for successful code and long code. + if success and self.make_udfs and len(code.split('\n')) > 4: + await self.make_tools(code=code) self.working_memory.add( Message(content=result, role="user", cause_by=ExecutePyCode) ) @@ -304,6 +298,39 @@ class MLEngineer(Role): def get_working_memories(self) -> List[Message]: return self.working_memory.get() + def reset(self): + """Restart role with the same goal.""" + self.plan = Plan(goal=self.plan.goal) + self.execute_code = ExecutePyCode() + + async def make_tools(self, code: str): + """Make user-defined functions(udfs, aka tools) for pure generation code. + + Args: + code (str): pure generation code by class WriteCodeByGenerate. + """ + logger.warning(f"Making tools for task_id {self.plan.current_task_id}: \ + `{self.plan.current_task.instruction}` \n code: \n {code}") + make_tools = MakeTools() + code_prompt = f"The following code is about {self.plan.current_task.instruction},\ + convert it to be a General Function, {code}" + tool_code = await make_tools.run(code_prompt) + # check tool_code by execute_code + logger.info(f"Checking task_id {self.plan.current_task_id} tool code by executor...") + _, success = await self.execute_code.run(tool_code) + make_tool_retries, make_tool_current_retry = 3, 1 + while not success: + tool_code = await make_tools.run(code_prompt) + _, success = await self.execute_code.run(tool_code) + if make_tool_current_retry > make_tool_retries: + logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ + and still have not created tools for task_id {self.plan.current_task_id} successfully,\ + we will skip it.") + break + # save successful tool code in udf + if success: + make_tools.save(tool_code) + if __name__ == "__main__": requirement = "Run data analysis on sklearn Iris dataset, include a plot" @@ -314,6 +341,12 @@ if __name__ == "__main__": async def main(requirement: str = requirement, auto_run: bool = True): role = MLEngineer(goal=requirement, auto_run=auto_run) + # make udfs + role.make_udfs = True + role.use_udfs = False + await role.run(requirement) + # use udfs + role.reset() role.make_udfs = False role.use_udfs = True await role.run(requirement) From 7b8c15b5df5cdb3a622a51945053f02bbc3dc25b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 12:15:51 +0800 Subject: [PATCH 51/68] feat: add make_tools and feat function. --- metagpt/roles/ml_engineer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 052b99ad5..b908d9ef8 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -21,7 +21,6 @@ from metagpt.prompts.ml_engineer import ( PRINT_DATA_COLUMNS ) from metagpt.roles import Role -from metagpt.roles.role import RoleContext from metagpt.roles.kaggle_manager import DownloadData, SubmitResult from metagpt.schema import Message, Plan from metagpt.utils.common import remove_comments, create_func_config From 99945e3493797b117ba022a974912ceeffb8fda4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 17:57:07 +0800 Subject: [PATCH 52/68] update default_system_msg in BaseWriteAnalysisCode. --- metagpt/actions/write_analysis_code.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 88f22684d..924677605 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -33,7 +33,7 @@ class BaseWriteAnalysisCode(Action): # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!""" def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None): - default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG + default_system_msg = system_msg or "" # 全部转成list if not isinstance(prompt, list): prompt = [prompt] @@ -231,7 +231,7 @@ class MakeTools(WriteCodeByGenerate): **Notice: 1. Your code must contain a general function start with `def`. 2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time. - 3. Must use Google style for function docstring, and your code must have function docstring. + 3. Must use Google style for function docstring, and your docstring must be consistent with the code,without missing anything. 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, and make sure it could be execute in the user's machine. 5. Dont have missing package references.** From aa5c42ff8b99023bc05df075f5c15c486ebd3f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 18:12:15 +0800 Subject: [PATCH 53/68] use self.DEFAULT_SYSTEM_MSG in process_msg. --- metagpt/actions/write_analysis_code.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 924677605..e50c069f0 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -96,7 +96,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode): **kwargs, ) -> str: # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user")) - prompt = self.process_msg(context, system_msg) + prompt = self.process_msg(context, system_msg or self.DEFAULT_SYSTEM_MSG) code_content = await self.llm.aask_code(prompt, **kwargs) return code_content["code"] @@ -269,7 +269,7 @@ class MakeTools(WriteCodeByGenerate): @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) async def run(self, code_message: List[Message | Dict], **kwargs) -> str: - msgs = self.process_msg(code_message) + msgs = self.process_msg(code_message, self.DEFAULT_SYSTEM_MSG) logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {msgs[-1]}") tool_code = await self.llm.aask_code(msgs, **kwargs) max_tries, current_try = 3, 1 From 1145641cdcbb94b3506c820ea10adc31e35d61aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 18:16:32 +0800 Subject: [PATCH 54/68] update --- .../actions/test_write_analysis_code.py | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/tests/metagpt/actions/test_write_analysis_code.py b/tests/metagpt/actions/test_write_analysis_code.py index 68ca129cc..1a568cdcd 100644 --- a/tests/metagpt/actions/test_write_analysis_code.py +++ b/tests/metagpt/actions/test_write_analysis_code.py @@ -1,7 +1,7 @@ import asyncio import pytest -from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools, WriteCodeWithUDFs +from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools from metagpt.actions.execute_code import ExecutePyCode from metagpt.schema import Message, Plan, Task from metagpt.logs import logger @@ -304,23 +304,3 @@ async def test_write_code_reuse_code_long_for_wine(): success_rate = sum(success) / trials_num logger.info(f"success rate: {success_rate :.2f}") assert success_rate >= 0.8 - - -@pytest.mark.asyncio -async def test_write_code_with_udfs(): - wudf = WriteCodeWithUDFs() - ep = ExecutePyCode() - rsp = await wudf.run("Get Apple stock data for the past 90 days.") - logger.info(rsp) - assert 'metagpt' in rsp - output, output_type = await ep.run(rsp) - assert output_type is True - logger.info(output) - - -@pytest.mark.asyncio -async def test_write_code_with_udfs_no_udf_found(): - wudf = WriteCodeWithUDFs() - rsp = await wudf.run("Identify if there is a dog in the picture.") - logger.info(rsp) - assert 'No udf found' in rsp From 5af4f6b4c524e62dd43ff6f6f6e80062f8427ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Wed, 20 Dec 2023 19:56:26 +0800 Subject: [PATCH 55/68] add new test for aask_code about write code by steps. --- tests/metagpt/provider/test_openai.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/metagpt/provider/test_openai.py b/tests/metagpt/provider/test_openai.py index 2b0af37b5..98a3670f1 100644 --- a/tests/metagpt/provider/test_openai.py +++ b/tests/metagpt/provider/test_openai.py @@ -78,3 +78,17 @@ def test_ask_code_list_str(): assert "language" in rsp assert "code" in rsp assert len(rsp["code"]) > 0 + + +@pytest.mark.asyncio +async def test_ask_code_steps2(): + llm = OpenAIGPTAPI() + msg = ["step by setp 生成代码: Step 1. 先生成随机数组a, Step 2. 求a中最大值, Step 3. 绘制数据a的直方图"] + rsp = await llm.aask_code(msg) # -> {'language': 'python', 'code': 'max_value = max(a)\nmax_value'} + print(rsp) + assert "language" in rsp + assert "code" in rsp + assert len(rsp["code"]) > 0 + assert "Step 1" in rsp["code"] + assert "Step 2" in rsp["code"] + assert "Step 3" in rsp["code"] From a39cc30164140588c3b4a938618cfe22893d1438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 10:11:07 +0800 Subject: [PATCH 56/68] add test for ml_engineer. --- tests/metagpt/roles/test_daml.py | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tests/metagpt/roles/test_daml.py diff --git a/tests/metagpt/roles/test_daml.py b/tests/metagpt/roles/test_daml.py new file mode 100644 index 000000000..672a3daed --- /dev/null +++ b/tests/metagpt/roles/test_daml.py @@ -0,0 +1,36 @@ +import pytest +from tqdm import tqdm + +from metagpt.logs import logger +from metagpt.roles.ml_engineer import MLEngineer + + +async def make_use_tools(requirement: str, auto_run: bool = True): + """make and use tools for requirement.""" + role = MLEngineer(goal=requirement, auto_run=auto_run) + # make udfs + role.make_udfs = True + role.use_udfs = False + await role.run(requirement) + # use udfs + role.reset() + role.make_udfs = False + role.use_udfs = True + await role.run(requirement) + + +@pytest.mark.asyncio +async def test_make_use_tools(): + requirements = ["Run data analysis on sklearn Iris dataset, include a plot", + "Run data analysis on sklearn Diabetes dataset, include a plot", + "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy", + "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy", + "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: tests/data/titanic.csv"] + success = 0 + for requirement in tqdm(requirements, total=len(requirements)): + try: + await make_use_tools(requirement) + success += 1 + except Exception as e: + logger.error(f"Found Error in {requirement}, {e}") + logger.info(f"success: {round(success/len(requirements), 1)*100}%") From c43e2bed6b916096f117f72db393903694a7c090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 10:14:25 +0800 Subject: [PATCH 57/68] update condition for DebugCode. --- metagpt/roles/ml_engineer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index b908d9ef8..9fa12b41d 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -99,7 +99,7 @@ class MLEngineer(Role): self.plan.finish_current_task() self.working_memory.clear() - if self.use_tools: + if self.use_tools or self.use_udfs: success, new_code = await self._update_data_columns() if success: task.code = task.code + "\n\n" + new_code @@ -159,7 +159,8 @@ class MLEngineer(Role): # print(context) # print("*" * 10) # breakpoint() - if counter > 0 and self.use_tools: + if counter > 0 and (self.use_tools or self.use_udfs): + logger.warning('We got a bug code, now start to debug...') code = await DebugCode().run( plan=self.plan.current_task.instruction, code=code, @@ -168,11 +169,11 @@ class MLEngineer(Role): ) logger.info(f"new code \n{code}") cause_by = DebugCode - elif not self.use_tools or self.plan.current_task.task_type == "other": + elif not self.use_tools or self.plan.current_task.task_type in ("other", "udf"): if self.use_udfs: # use user-defined function tools. from metagpt.tools.functions.libs.udf import UDFS_YAML - logger.warning("Writing code with user-defined function tools...") + logger.warning("Writing code with user-defined function tools by WriteCodeWithTools.") logger.info(f"Local user defined function as following:\ \n{json.dumps(list(UDFS_YAML.keys()), indent=2, ensure_ascii=False)}") # set task_type to `udf` @@ -211,6 +212,7 @@ class MLEngineer(Role): print(result) # make tools for successful code and long code. if success and self.make_udfs and len(code.split('\n')) > 4: + logger.info('Execute code successfully. Now start to make tools ...') await self.make_tools(code=code) self.working_memory.add( Message(content=result, role="user", cause_by=ExecutePyCode) From 1160f075360aecf53b6604bcdbc0cc98d4913f87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 11:03:54 +0800 Subject: [PATCH 58/68] update reset. --- metagpt/roles/ml_engineer.py | 87 +++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 41 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 5d514a18f..3e656304b 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -25,7 +25,7 @@ from metagpt.roles.kaggle_manager import DownloadData, SubmitResult from metagpt.schema import Message, Plan from metagpt.utils.common import remove_comments, create_func_config from metagpt.utils.save_code import save_code_file -from metagpt.utils.recovery_util import save_history, load_history +# from metagpt.utils.recovery_util import save_history, load_history class UpdateDataColumns(Action): @@ -297,6 +297,7 @@ class MLEngineer(Role): """Restart role with the same goal.""" self.plan = Plan(goal=self.plan.goal) self.execute_code = ExecutePyCode() + self.working_memory = Memory() async def make_tools(self, code: str): """Make user-defined functions(udfs, aka tools) for pure generation code. @@ -328,23 +329,27 @@ class MLEngineer(Role): if __name__ == "__main__": - # requirement = "Run data analysis on sklearn Iris dataset, include a plot" + requirement = "Run data analysis on sklearn Iris dataset, include a plot" # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot" # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy" # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv" - # async def main(requirement: str = requirement, auto_run: bool = True): - # role = MLEngineer(goal=requirement, auto_run=auto_run) - # # make udfs - # role.make_udfs = True - # role.use_udfs = False - # await role.run(requirement) - # # use udfs - # role.reset() - # role.make_udfs = False - # role.use_udfs = True - # await role.run(requirement) + async def main(requirement: str = requirement, auto_run: bool = True): + role = MLEngineer(goal=requirement, auto_run=auto_run) + # make udfs + role.use_tools = False + role.use_code_steps = False + role.make_udfs = True + role.use_udfs = False + await role.run(requirement) + # use udfs + role.reset() + role.make_udfs = False + role.use_udfs = True + role.use_code_steps = False + role.use_tools = False + await role.run(requirement) # requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." @@ -358,44 +363,44 @@ if __name__ == "__main__": # data_path = f"{DATA_PATH}/santander-customer-transaction-prediction" # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ." - data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" - requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." + # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" + # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - save_dir = "" + # save_dir = "" - # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" + # # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" - async def main(requirement: str = requirement, auto_run: bool = True, save_dir: str = save_dir): - """ - The main function to run the MLEngineer with optional history loading. + # async def main(requirement: str = requirement, auto_run: bool = True, save_dir: str = save_dir): + # """ + # The main function to run the MLEngineer with optional history loading. - Args: - requirement (str): The requirement for the MLEngineer. - auto_run (bool): Whether to auto-run the MLEngineer. - save_dir (str): The directory from which to load the history or to save the new history. + # Args: + # requirement (str): The requirement for the MLEngineer. + # auto_run (bool): Whether to auto-run the MLEngineer. + # save_dir (str): The directory from which to load the history or to save the new history. - Raises: - Exception: If an error occurs during execution, log the error and save the history. - """ - if save_dir: - logger.info("Resuming from history trajectory") - plan, nb = load_history(save_dir) - role = MLEngineer(goal=requirement, auto_run=auto_run) - role.plan = Plan(**plan) - role.execute_code = ExecutePyCode(nb) + # Raises: + # Exception: If an error occurs during execution, log the error and save the history. + # """ + # if save_dir: + # logger.info("Resuming from history trajectory") + # plan, nb = load_history(save_dir) + # role = MLEngineer(goal=requirement, auto_run=auto_run) + # role.plan = Plan(**plan) + # role.execute_code = ExecutePyCode(nb) - else: - logger.info("Run from scratch") - role = MLEngineer(goal=requirement, auto_run=auto_run) + # else: + # logger.info("Run from scratch") + # role = MLEngineer(goal=requirement, auto_run=auto_run) - try: - await role.run(requirement) - except Exception as e: + # try: + # await role.run(requirement) + # except Exception as e: - save_path = save_history(role, save_dir) + # save_path = save_history(role, save_dir) - logger.exception(f"An error occurred: {e}, save trajectory here: {save_path}") + # logger.exception(f"An error occurred: {e}, save trajectory here: {save_path}") fire.Fire(main) From 82dce58e4e3c646b3cb2190c8db9a854bc297969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 13:33:42 +0800 Subject: [PATCH 59/68] update DEFAULT_SYSTEM_MSG. --- metagpt/actions/write_analysis_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 02aba0e62..d457ea75b 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -29,7 +29,7 @@ from metagpt.utils.common import create_func_config, remove_comments class BaseWriteAnalysisCode(Action): - DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt + DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!""" def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None): From e8f5ce0f0a64c222af06b59588707798d3444a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 13:34:31 +0800 Subject: [PATCH 60/68] update use_udfs. --- metagpt/roles/ml_engineer.py | 43 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 7e5cc8caf..092229ec9 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -148,7 +148,16 @@ class MLEngineer(Role): ) logger.info(f"new code \n{code}") cause_by = DebugCode - elif not self.use_tools or self.plan.current_task.task_type in ("other", "udf"): + elif not self.use_tools or self.plan.current_task.task_type == 'other': + logger.info("Write code with pure generation") + # TODO: 添加基于current_task.instruction-code_path的k-v缓存 + code = await WriteCodeByGenerate().run( + context=context, plan=self.plan, temperature=0.0 + ) + debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] + cause_by = WriteCodeByGenerate + else: + logger.info("Write code with tools") if self.use_udfs: # use user-defined function tools. from metagpt.tools.functions.libs.udf import UDFS_YAML @@ -165,24 +174,14 @@ class MLEngineer(Role): debug_context = tool_context cause_by = WriteCodeWithTools else: - logger.info("Write code with pure generation") - # TODO: 添加基于current_task.instruction-code_path的k-v缓存 - code = await WriteCodeByGenerate().run( - context=context, plan=self.plan, temperature=0.0 + schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas" + tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run( + context=context, + plan=self.plan, + column_info=self.data_desc.get("column_info", ""), ) - debug_context = [self.get_useful_memories(task_exclude_field={'result', 'code_steps'})[0]] - cause_by = WriteCodeByGenerate - else: - logger.info("Write code with tools") - schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas" - tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run( - context=context, - plan=self.plan, - column_info=self.data_desc.get("column_info", ""), - ) - debug_context = tool_context - cause_by = WriteCodeWithTools - + debug_context = tool_context + cause_by = WriteCodeWithTools self.working_memory.add( Message(content=code, role="assistant", cause_by=cause_by) ) @@ -346,10 +345,10 @@ if __name__ == "__main__": # data_path = f"{DATA_PATH}/santander-customer-transaction-prediction" # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ." - data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" - requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - save_dir = "" - # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" + # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" + # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." + # save_dir = "" + # # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" async def main(requirement: str = requirement, auto_run: bool = True, use_tools: bool = False, use_code_steps: bool = False, save_dir: str = ""): """ From 94b352cf2375296567bb1033efee85855f64e724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 16:43:23 +0800 Subject: [PATCH 61/68] update MakeTools DEFAULT_SYSTEM_MSG. --- metagpt/actions/write_analysis_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index d457ea75b..099934c5a 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -236,7 +236,7 @@ class MakeTools(WriteCodeByGenerate): 3. Must use Google style for function docstring, and your docstring must be consistent with the code,without missing anything. 4. Write example code after `if __name__ == '__main__':`by using old varibales in old code, and make sure it could be execute in the user's machine. - 5. Dont have missing package references.** + 5. Only use the imported packages** """ def __init__(self, name: str = '', context: list[Message] = None, llm: LLM = None, workspace: str = None): From 6d36511249fbbcd4fc595f9a9c11861cac94c8d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 16:58:02 +0800 Subject: [PATCH 62/68] update make tools: code -> remove_comments(code). --- metagpt/roles/ml_engineer.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index 092229ec9..f44d42554 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -20,6 +20,7 @@ from metagpt.roles.kaggle_manager import DownloadData, SubmitResult from metagpt.schema import Message, Plan from metagpt.utils.save_code import save_code_file from metagpt.utils.recovery_util import save_history, load_history +from metagpt.utils.common import remove_comments class MLEngineer(Role): @@ -189,7 +190,7 @@ class MLEngineer(Role): result, success = await self.execute_code.run(code) print(result) # make tools for successful code and long code. - if success and self.make_udfs and len(code.split('\n')) > 4: + if success and self.make_udfs and len(remove_comments(code).split('\n')) > 4: logger.info('Execute code successfully. Now start to make tools ...') await self.make_tools(code=code) self.working_memory.add( @@ -326,12 +327,12 @@ if __name__ == "__main__": role.use_udfs = False await role.run(requirement) # use udfs - role.reset() - role.make_udfs = False - role.use_udfs = True - role.use_code_steps = False - role.use_tools = False - await role.run(requirement) + # role.reset() + # role.make_udfs = False + # role.use_udfs = True + # role.use_code_steps = False + # role.use_tools = False + # await role.run(requirement) # requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." @@ -381,4 +382,4 @@ if __name__ == "__main__": logger.exception(f"An error occurred: {e}, save trajectory here: {save_path}") - fire.Fire(main) + fire.Fire(run_udfs) From bb7f4c33105e0a020c8249fb8477c0b3365b1fb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Thu, 21 Dec 2023 17:16:33 +0800 Subject: [PATCH 63/68] update code prompt for make tools. --- metagpt/actions/write_analysis_code.py | 13 +++++++++++-- metagpt/roles/ml_engineer.py | 7 +++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 099934c5a..c9acb32b9 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -270,9 +270,18 @@ class MakeTools(WriteCodeByGenerate): saved_path.write_text(tool_code, encoding='utf-8') @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) - async def run(self, code_message: List[Message | Dict], **kwargs) -> str: - msgs = self.process_msg(code_message, self.DEFAULT_SYSTEM_MSG) + async def run(self, code: str, code_desc: str = None, **kwargs) -> str: + # 拼接code prompt + code_prompt = f"The following code is about {code_desc}, convert it to be a General Function, {code}" + msgs = self.process_msg(code_prompt, self.DEFAULT_SYSTEM_MSG) logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {msgs[-1]}") + + # 更新kwargs + if 'code' in kwargs: + kwargs.pop('code') + if 'code_desc' in kwargs: + kwargs.pop('code_desc') + tool_code = await self.llm.aask_code(msgs, **kwargs) max_tries, current_try = 3, 1 func_name = self.parse_function_name(tool_code['code']) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index f44d42554..db2dfeeff 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -291,15 +291,14 @@ class MLEngineer(Role): logger.warning(f"Making tools for task_id {self.plan.current_task_id}: \ `{self.plan.current_task.instruction}` \n code: \n {code}") make_tools = MakeTools() - code_prompt = f"The following code is about {self.plan.current_task.instruction},\ - convert it to be a General Function, {code}" - tool_code = await make_tools.run(code_prompt) + tool_code = await make_tools.run(code, self.plan.current_task.instruction) # check tool_code by execute_code logger.info(f"Checking task_id {self.plan.current_task_id} tool code by executor...") _, success = await self.execute_code.run(tool_code) make_tool_retries, make_tool_current_retry = 3, 1 while not success: - tool_code = await make_tools.run(code_prompt) + # tool_code = await make_tools.run(code_prompt) + tool_code = await make_tools.run(code) _, success = await self.execute_code.run(tool_code) if make_tool_current_retry > make_tool_retries: logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ From 7806013dcebf611d26581d170c4e7c2fb7ee673a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Fri, 22 Dec 2023 14:07:26 +0800 Subject: [PATCH 64/68] update: use WriteCodeByGenerate conditions. --- metagpt/roles/ml_engineer.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index db2dfeeff..c2df4bb79 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -149,7 +149,8 @@ class MLEngineer(Role): ) logger.info(f"new code \n{code}") cause_by = DebugCode - elif not self.use_tools or self.plan.current_task.task_type == 'other': + elif (not self.use_tools and not self.use_udfs) or ( + self.plan.current_task.task_type == 'other' and not self.use_udfs): logger.info("Write code with pure generation") # TODO: 添加基于current_task.instruction-code_path的k-v缓存 code = await WriteCodeByGenerate().run( @@ -326,12 +327,12 @@ if __name__ == "__main__": role.use_udfs = False await role.run(requirement) # use udfs - # role.reset() - # role.make_udfs = False - # role.use_udfs = True - # role.use_code_steps = False - # role.use_tools = False - # await role.run(requirement) + role.reset() + role.make_udfs = False + role.use_udfs = True + role.use_code_steps = False + role.use_tools = False + await role.run(requirement) # requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." From be47f6171daa61b3a4ef7249379f68aacfd73917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 26 Dec 2023 14:08:10 +0800 Subject: [PATCH 65/68] resolve CR in MR17. --- metagpt/roles/ml_engineer.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index c2df4bb79..cafd9b968 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -168,22 +168,16 @@ class MLEngineer(Role): \n{json.dumps(list(UDFS_YAML.keys()), indent=2, ensure_ascii=False)}") # set task_type to `udf` self.plan.current_task.task_type = 'udf' - tool_context, code = await WriteCodeWithTools(schema_path=UDFS_YAML).run( - context=context, - plan=self.plan, - column_info=self.data_desc.get("column_info", ""), - ) - debug_context = tool_context - cause_by = WriteCodeWithTools + schema_path = UDFS_YAML else: schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas" - tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run( - context=context, - plan=self.plan, - column_info=self.data_desc.get("column_info", ""), - ) - debug_context = tool_context - cause_by = WriteCodeWithTools + tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run( + context=context, + plan=self.plan, + column_info=self.data_desc.get("column_info", ""), + ) + debug_context = tool_context + cause_by = WriteCodeWithTools self.working_memory.add( Message(content=code, role="assistant", cause_by=cause_by) ) @@ -301,6 +295,7 @@ class MLEngineer(Role): # tool_code = await make_tools.run(code_prompt) tool_code = await make_tools.run(code) _, success = await self.execute_code.run(tool_code) + make_tool_retries += 1 if make_tool_current_retry > make_tool_retries: logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ and still have not created tools for task_id {self.plan.current_task_id} successfully,\ From b43cdb23f7921daf9ba4866746928e8d38bc55e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 26 Dec 2023 14:11:13 +0800 Subject: [PATCH 66/68] update make_tools. --- metagpt/roles/ml_engineer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index cafd9b968..b991d9329 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -295,7 +295,7 @@ class MLEngineer(Role): # tool_code = await make_tools.run(code_prompt) tool_code = await make_tools.run(code) _, success = await self.execute_code.run(tool_code) - make_tool_retries += 1 + make_tool_current_retry += 1 if make_tool_current_retry > make_tool_retries: logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ and still have not created tools for task_id {self.plan.current_task_id} successfully,\ From b49db2d62f55db6823335ecad54bf841f348245e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 26 Dec 2023 14:14:18 +0800 Subject: [PATCH 67/68] resolve cr in MR17. --- metagpt/actions/write_analysis_code.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index c9acb32b9..3e912ace5 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -33,7 +33,7 @@ class BaseWriteAnalysisCode(Action): # REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!""" def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None): - default_system_msg = system_msg or "" + default_system_msg = system_msg or self.DEFAULT_SYSTEM_MSG # 全部转成list if not isinstance(prompt, list): prompt = [prompt] @@ -96,7 +96,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode): **kwargs, ) -> str: # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user")) - prompt = self.process_msg(context, system_msg or self.DEFAULT_SYSTEM_MSG) + prompt = self.process_msg(context, system_msg) code_content = await self.llm.aask_code(prompt, **kwargs) return code_content["code"] From a2743d2b1fe47761db9be24ca6a49e526b9289eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=A3=92=E6=A3=92?= Date: Tue, 26 Dec 2023 15:48:04 +0800 Subject: [PATCH 68/68] resolve cr in MR17. --- metagpt/actions/write_analysis_code.py | 34 ++++++----- metagpt/roles/ml_engineer.py | 80 ++++++++++---------------- tests/metagpt/roles/test_daml.py | 4 ++ 3 files changed, 55 insertions(+), 63 deletions(-) diff --git a/metagpt/actions/write_analysis_code.py b/metagpt/actions/write_analysis_code.py index 3e912ace5..9691f888f 100644 --- a/metagpt/actions/write_analysis_code.py +++ b/metagpt/actions/write_analysis_code.py @@ -249,6 +249,7 @@ class MakeTools(WriteCodeByGenerate): super().__init__(name, context, llm) self.workspace = workspace or str(Path(__file__).parents[1].joinpath("./tools/functions/libs/udf")) self.file_suffix: str = '.py' + self.context = [] def parse_function_name(self, function_code: str) -> str: # 定义正则表达式模式 @@ -270,11 +271,14 @@ class MakeTools(WriteCodeByGenerate): saved_path.write_text(tool_code, encoding='utf-8') @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) - async def run(self, code: str, code_desc: str = None, **kwargs) -> str: + async def run(self, code: str | List[dict], code_desc: str = None, **kwargs) -> str: # 拼接code prompt code_prompt = f"The following code is about {code_desc}, convert it to be a General Function, {code}" - msgs = self.process_msg(code_prompt, self.DEFAULT_SYSTEM_MSG) - logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {msgs[-1]}") + if not self.context: + self.context = self.process_msg(code_prompt) + else: + self.context.append(self.process_msg(code_prompt)[-1]) + logger.info(f"\n\nAsk to Make tools:\n{'-'*60}\n {self.context[-1]}") # 更新kwargs if 'code' in kwargs: @@ -282,17 +286,21 @@ class MakeTools(WriteCodeByGenerate): if 'code_desc' in kwargs: kwargs.pop('code_desc') - tool_code = await self.llm.aask_code(msgs, **kwargs) - max_tries, current_try = 3, 1 - func_name = self.parse_function_name(tool_code['code']) - while current_try < max_tries and func_name is None: - logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") - logger.warning(f"No function name found in code, we will retry make tools. \n\n{tool_code['code']}\n") - msgs.append({'role': 'assistant', 'content': 'We need a general function in above code,but not found function.'}) - tool_code = await self.llm.aask_code(msgs, **kwargs) - current_try += 1 + max_tries, current_try = 3, 0 + while True: + tool_code = await self.llm.aask_code(self.context, **kwargs) func_name = self.parse_function_name(tool_code['code']) - if func_name is not None: + current_try += 1 + # make tools failed, add error message to context. + if not func_name: + logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") + logger.error(f"No function name found in code, we will retry make tools.\n{tool_code['code']}\n") + self.context.append({'role': 'user', 'content': 'We need a general function in above code,but not found function.'}) + # end make tools + if func_name is not None or current_try >= max_tries: + if current_try >= max_tries: + logger.error(f"We have tried the maximum number of attempts {max_tries}\ + and still have not created tools successfully, we will skip it.") break logger.info(f"\n\nTools Respond\n{'-'*60}\n: {tool_code}") self.save(tool_code['code']) diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index b991d9329..cec572991 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -286,65 +286,45 @@ class MLEngineer(Role): logger.warning(f"Making tools for task_id {self.plan.current_task_id}: \ `{self.plan.current_task.instruction}` \n code: \n {code}") make_tools = MakeTools() - tool_code = await make_tools.run(code, self.plan.current_task.instruction) - # check tool_code by execute_code - logger.info(f"Checking task_id {self.plan.current_task_id} tool code by executor...") - _, success = await self.execute_code.run(tool_code) - make_tool_retries, make_tool_current_retry = 3, 1 - while not success: - # tool_code = await make_tools.run(code_prompt) - tool_code = await make_tools.run(code) - _, success = await self.execute_code.run(tool_code) + make_tool_retries, make_tool_current_retry = 3, 0 + while True: + # start make tools + tool_code = await make_tools.run(code, self.plan.current_task.instruction) make_tool_current_retry += 1 - if make_tool_current_retry > make_tool_retries: - logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ - and still have not created tools for task_id {self.plan.current_task_id} successfully,\ - we will skip it.") + + # check tool_code by execute_code + logger.info(f"Checking task_id {self.plan.current_task_id} tool code by executor...") + execute_result, execute_success = await self.execute_code.run(tool_code) + if not execute_success: + logger.error(f"Tool code faild to execute, \n{execute_result}\n.We will try to fix it ...") + # end make tools + if execute_success or make_tool_current_retry >= make_tool_retries: + if make_tool_current_retry >= make_tool_retries: + logger.error(f"We have tried the maximum number of attempts {make_tool_retries}\ + and still have not created tools for task_id {self.plan.current_task_id} successfully,\ + we will skip it.") break # save successful tool code in udf - if success: + if execute_success: make_tools.save(tool_code) if __name__ == "__main__": - requirement = "Run data analysis on sklearn Iris dataset, include a plot" - # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot" - # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" - # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy" - # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv" - - async def run_udfs(requirement: str = requirement, auto_run: bool = True): - role = MLEngineer(goal=requirement, auto_run=auto_run) - # make udfs - role.use_tools = False - role.use_code_steps = False - role.make_udfs = True - role.use_udfs = False - await role.run(requirement) - # use udfs - role.reset() - role.make_udfs = False - role.use_udfs = True - role.use_code_steps = False - role.use_tools = False - await role.run(requirement) - + requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." - # requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy." + data_path = f"{DATA_PATH}/titanic" + requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." + requirement = f"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" + data_path = f"{DATA_PATH}/icr-identify-age-related-conditions" + requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv." - # data_path = f"{DATA_PATH}/titanic" - # requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - # requirement = f"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" - # data_path = f"{DATA_PATH}/icr-identify-age-related-conditions" - # requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv." + data_path = f"{DATA_PATH}/santander-customer-transaction-prediction" + requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ." - # data_path = f"{DATA_PATH}/santander-customer-transaction-prediction" - # requirement = f"This is a customers financial dataset. Your goal is to predict which customers will make a specific transaction in the future. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report F1 Score on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv' ." - - # data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" - # requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." - # save_dir = "" - # # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" + data_path = f"{DATA_PATH}/house-prices-advanced-regression-techniques" + requirement = f"This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'." + save_dir = "" + # save_dir = DATA_PATH / "output" / "2023-12-14_20-40-34" async def main(requirement: str = requirement, auto_run: bool = True, use_tools: bool = False, use_code_steps: bool = False, save_dir: str = ""): """ @@ -377,4 +357,4 @@ if __name__ == "__main__": logger.exception(f"An error occurred: {e}, save trajectory here: {save_path}") - fire.Fire(run_udfs) + fire.Fire(main) diff --git a/tests/metagpt/roles/test_daml.py b/tests/metagpt/roles/test_daml.py index 672a3daed..55b425316 100644 --- a/tests/metagpt/roles/test_daml.py +++ b/tests/metagpt/roles/test_daml.py @@ -9,6 +9,8 @@ async def make_use_tools(requirement: str, auto_run: bool = True): """make and use tools for requirement.""" role = MLEngineer(goal=requirement, auto_run=auto_run) # make udfs + role.use_tools = False + role.use_code_steps = False role.make_udfs = True role.use_udfs = False await role.run(requirement) @@ -16,6 +18,8 @@ async def make_use_tools(requirement: str, auto_run: bool = True): role.reset() role.make_udfs = False role.use_udfs = True + role.use_code_steps = False + role.use_tools = False await role.run(requirement)