From 9963f90c92a4a42b6c341f8c6fa6e7765a42340b Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Wed, 14 Aug 2024 14:58:20 +0800 Subject: [PATCH 1/2] enable da for doc qa --- metagpt/configs/llm_config.py | 5 ----- metagpt/prompts/di/data_analyst.py | 3 ++- metagpt/roles/di/data_analyst.py | 4 ++-- metagpt/tools/libs/editor.py | 5 ++++- tests/metagpt/environment/mgx_env/run_mgx_env.py | 4 ++++ 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/metagpt/configs/llm_config.py b/metagpt/configs/llm_config.py index c5605be21..57913956c 100644 --- a/metagpt/configs/llm_config.py +++ b/metagpt/configs/llm_config.py @@ -101,8 +101,3 @@ class LLMConfig(YamlModel): @classmethod def check_timeout(cls, v): return v or LLM_API_TIMEOUT - - @field_validator("compress_type") - @classmethod - def check_compress_type(cls, v): - return CompressType.get_type(v) diff --git a/metagpt/prompts/di/data_analyst.py b/metagpt/prompts/di/data_analyst.py index 3e4dac6f5..9d7f3ec50 100644 --- a/metagpt/prompts/di/data_analyst.py +++ b/metagpt/prompts/di/data_analyst.py @@ -7,7 +7,8 @@ EXTRA_INSTRUCTION = """ - Make sure the command_name are certainly in Available Commands when you use the Browser tool. - For information searching requirement, you should use the Browser tool instead of web scraping. - When no link is provided, you should use the Browser tool to search for the information. -7. When you are making plan. It is highly recommend to plan and append all the tasks in first response once time. +7. When you are making plan. It is highly recommend to plan and append all the tasks in first response once time, except for 7.1. +7.1. When the requirement is given with a file, read the file first through either Editor.read (write code instead for excel) WITHOUT a plan. After reading the file content, use RoleZero.reply_to_human if the requirement can be answered straightaway, otherwise, make a plan if further calculation is needed. 8. Don't finish_current_task multiple times for the same task. 9. Finish current task timely, such as when the code is written and executed successfully. 10. When using the command 'end', add the command 'finish_current_task' before it. diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index f65042217..457a34d03 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -26,11 +26,11 @@ from metagpt.tools.tool_registry import register_tool class DataAnalyst(RoleZero): name: str = "David" profile: str = "DataAnalyst" - goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, etc." + goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, document QA & analysis, etc." instruction: str = ROLE_INSTRUCTION + EXTRA_INSTRUCTION task_type_desc: str = TASK_TYPE_DESC - tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"] + tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser", "Editor:write,read"] custom_tools: list[str] = ["web scraping", "Terminal"] custom_tool_recommender: ToolRecommender = None experience_retriever: Annotated[ExpRetriever, Field(exclude=True)] = KeywordExpRetriever() diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index c2fdcb859..1153e780d 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -24,7 +24,10 @@ class FileBlock(BaseModel): @register_tool() class Editor(BaseModel): - """A tool for reading, understanding, writing, and editing files""" + """ + A tool for reading, understanding, writing, and editing files. + Support local file including text-based files (txt, md, json, py, html, js, css, etc.), pdf, docx, excluding images, excel, or online links + """ model_config = ConfigDict(arbitrary_types_allowed=True) diff --git a/tests/metagpt/environment/mgx_env/run_mgx_env.py b/tests/metagpt/environment/mgx_env/run_mgx_env.py index 69efb32da..b495d376a 100644 --- a/tests/metagpt/environment/mgx_env/run_mgx_env.py +++ b/tests/metagpt/environment/mgx_env/run_mgx_env.py @@ -143,6 +143,10 @@ clone https://github.com/garylin2099/simple_calculator, checkout a new branch na Commit your changes and push, finally, create a PR to the master branch of https://github.com/mannaandpoem/simple_calculator. """ IMAGE2CODE_REQ = "Please write a frontend web page similar to this image /Users/gary/Files/temp/workspace/temp_img.png, I want the same title and color. code only" +DOC_QA_REQ1 = "Tell me what this paper is about /Users/gary/Files/temp/workspace/2308.09687.pdf" +DOC_QA_REQ2 = "Summarize this doc /Users/gary/Files/temp/workspace/2401.14295.pdf" +DOC_QA_REQ3 = "请总结/Users/gary/Files/temp/workspace/2309.04658.pdf里的关键点" +DOC_QA_REQ4 = "这份报表/Users/gary/Files/temp/workspace/9929550.md中,营业收入TOP3产品各自的收入占比是多少" TL_CHAT1 = """Summarize the paper for me""" # expecting clarification TL_CHAT2 = """Solve the issue at this link""" # expecting clarification From 37f2a4100306ffe04801759645a62845144343fc Mon Sep 17 00:00:00 2001 From: garylin2099 Date: Wed, 14 Aug 2024 22:39:15 +0800 Subject: [PATCH 2/2] add one arg for write_and_exec_code to avoid llm hallucinate a code arg --- metagpt/roles/di/data_analyst.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index 457a34d03..d7a42a88f 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -51,8 +51,12 @@ class DataAnalyst(RoleZero): } ) - async def write_and_exec_code(self): - """Write a code block for current task and execute it in an interactive notebook environment. No argument is needed.""" + async def write_and_exec_code(self, instruction: str = ""): + """Write a code block for current task and execute it in an interactive notebook environment. + + Args: + instruction (optional, str): Further hints or notice other than the current task instruction, must be very concise and can be empty. Defaults to "". + """ if self.planner.plan: logger.info(f"Current task {self.planner.plan.current_task}") @@ -64,6 +68,7 @@ class DataAnalyst(RoleZero): if self.planner.current_task: # clear task result from plan to save token, since it has been in memory plan_status = self.planner.get_plan_status(exclude=["task_result"]) + plan_status += f"\nFurther Task Instruction: {instruction}" else: return "No current_task found now. Please use command Plan.append_task to add a task first."