diff --git a/metagpt/actions/di/execute_nb_code.py b/metagpt/actions/di/execute_nb_code.py index 64620d9cc..f3dfd1601 100644 --- a/metagpt/actions/di/execute_nb_code.py +++ b/metagpt/actions/di/execute_nb_code.py @@ -30,6 +30,12 @@ from metagpt.logs import logger from metagpt.utils.report import NotebookReporter INSTALL_KEEPLEN = 500 +INI_CODE = """import warnings +import logging + +root_logger = logging.getLogger() +root_logger.setLevel(logging.ERROR) +warnings.filterwarnings('ignore')""" class RealtimeOutputNotebookClient(NotebookClient): @@ -79,6 +85,12 @@ class ExecuteNbCode(Action): ) self.reporter = NotebookReporter() self.set_nb_client() + self.init_called = False + + async def init_code(self): + if not self.init_called: + await self.run(INI_CODE) + self.init_called = True def set_nb_client(self): self.nb_client = RealtimeOutputNotebookClient( @@ -175,9 +187,12 @@ class ExecuteNbCode(Action): is_success = False output_text = remove_escape_and_color_codes(output_text) + if is_success: + output_text = remove_log_and_warning_lines(output_text) # The useful information of the exception is at the end, # the useful information of normal output is at the begining. - output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] + if '' not in output_text: + output_text = output_text[:keep_len] if is_success else output_text[-keep_len:] parsed_output.append(output_text) return is_success, ",".join(parsed_output) @@ -268,6 +283,18 @@ class ExecuteNbCode(Action): return outputs, success +def remove_log_and_warning_lines(input_str: str) -> str: + delete_lines = ["[warning]", "warning:", "[cv]", "[info]"] + result = "\n".join( + [ + line + for line in input_str.split("\n") + if not any(dl in line.lower() for dl in delete_lines) + ] + ).strip() + return result + + def remove_escape_and_color_codes(input_str: str): # 使用正则表达式去除jupyter notebook输出结果中的转义字符和颜色代码 # Use regular expressions to get rid of escape characters and color codes in jupyter notebook output. diff --git a/metagpt/actions/di/write_analysis_code.py b/metagpt/actions/di/write_analysis_code.py index 548555196..00e6d174d 100644 --- a/metagpt/actions/di/write_analysis_code.py +++ b/metagpt/actions/di/write_analysis_code.py @@ -57,7 +57,7 @@ class WriteAnalysisCode(Action): code = await self._debug_with_reflection(context=context, working_memory=working_memory) else: rsp = await self.llm.aask(context, system_msgs=[INTERPRETER_SYSTEM_MSG], **kwargs) - code = CodeParser.parse_code(text=rsp) + code = CodeParser.parse_code(text=rsp, lang="python") return code diff --git a/metagpt/prompts/di/data_analyst.py b/metagpt/prompts/di/data_analyst.py index d26f2bf09..56ae0b68b 100644 --- a/metagpt/prompts/di/data_analyst.py +++ b/metagpt/prompts/di/data_analyst.py @@ -1,44 +1,26 @@ -CMD_PROMPT = """ -# Data Structure -class Task(BaseModel): - task_id: str = "" - dependent_task_ids: list[str] = [] - instruction: str = "" - task_type: str = "" - assignee: str = "David" +from metagpt.strategy.task_type import TaskType -# Available Commands -{available_commands} - -# Current Plan -{plan_status} - -# Example -{example} - -# Instructions -Based on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks. -If plan is created, you should track the progress and update the plan accordingly, such as finish_current_task, append_task, reset_task, replace_task, etc. -Pay close attention to new user message, review the conversation history, use reply_to_human to respond to new user requirement. -Note: -1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use ask_human to ask for help. -2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task. -3. Each time you finish a task, use reply_to_human to report your progress. -Pay close attention to the Example provided, you can reuse the example for your current situation if it fits. - -You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially. -If you finish current task, you will automatically take the next task in the existing plan, use finish_task, DON'T append a new task. - -# Your commands in a json array, in the following output format, always output a json array, if there is nothing to do, use the pass command: -Some text indicating your thoughts, such as how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. -```json -[ - {{ - "command_name": str, - "args": {{"arg_name": arg_value, ...}} - }}, - ... -] -``` -Notice: your output JSON data section must start with **```json [** +BROWSER_INSTRUCTION = """ +4. Carefully choose to use or not use the browser tool to assist you in web tasks. + - When no click action is required, no need to use the browser tool to navigate to the webpage before scraping. + - If you need detail HTML content, write code to get it but not to use the browser tool. + - Make sure the command_name are certainly in Available Commands when you use the browser tool. +""" + +TASK_TYPE_DESC = "\n".join([f"- **{tt.type_name}**: {tt.value.desc}" for tt in TaskType]) + + +CODE_STATUS = """ +**Code written**: +{code} + +**Execution status**: {status} +**Execution result**: {result} +""" + + +BROWSER_INFO = """ +Here are ordered web actions in the browser environment, note that you can not use the browser tool in the current environment. +{browser_actions} +The latest url is the one you should use to view the page. If view page has been done, directly use the variable and html content in executing result. """ diff --git a/metagpt/prompts/di/role_zero.py b/metagpt/prompts/di/role_zero.py index fd89f9d44..243f33604 100644 --- a/metagpt/prompts/di/role_zero.py +++ b/metagpt/prompts/di/role_zero.py @@ -5,7 +5,7 @@ When presented a current task, tackle the task using the available commands. Pay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to new user requirement. Note: 1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help. -2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task. +2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task by Plan.finish_current_task explicitly. 3. Each time you finish a task, use RoleZero.reply_to_human to report your progress. """ @@ -18,6 +18,9 @@ class Task(BaseModel): task_type: str = "" assignee: str = "" +# Available Task Types +{task_type_desc} + # Available Commands {available_commands} Special Command: Use {{"command_name": "end"}} to do nothing or indicate completion of all requirements and the end of actions. @@ -38,7 +41,7 @@ Pay close attention to the Example provided, you can reuse the example for your You may use any of the available commands to create a plan or update the plan. You may output mutiple commands, they will be executed sequentially. If you finish current task, you will automatically take the next task in the existing plan, use Plan.finish_task, DON'T append a new task. -# Your commands in a json array, in the following output format. If there is nothing to do, use the pass or end command: +# Your commands in a json array, in the following output format with correct command_name and args. If there is nothing to do, use the pass or end command: Some text indicating your thoughts, such as how you should update the plan status, respond to inquiry, or seek for help. Then a json array of commands. You must output ONE and ONLY ONE json array. DON'T output multiple json arrays with thoughts between them. ```json [ diff --git a/metagpt/prompts/di/write_analysis_code.py b/metagpt/prompts/di/write_analysis_code.py index af941808d..1d743a719 100644 --- a/metagpt/prompts/di/write_analysis_code.py +++ b/metagpt/prompts/di/write_analysis_code.py @@ -28,7 +28,10 @@ your code ``` """ -REFLECTION_SYSTEM_MSG = """You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation.""" +REFLECTION_SYSTEM_MSG = """ +You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation. +When occuring ModuleNotFoundError, always install the required package. And use Terminal tool if available. +""" DEBUG_REFLECTION_EXAMPLE = ''' [previous impl]: diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py index 5b1ffc744..312421c21 100644 --- a/metagpt/prompts/task_type.py +++ b/metagpt/prompts/task_type.py @@ -53,3 +53,9 @@ The current task is about converting image into webpage code. please note the fo - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow. - Save webpages: Be sure to use the save method provided. """ + +# Prompt for taking on "web_scraping" tasks +WEB_SCRAPING_PROMPT = """ +- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. +- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in the executing result first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code. +""" diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index 22afca1a6..2b1bb10b1 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -1,151 +1,109 @@ from __future__ import annotations -import json -from typing import Literal +import re +from typing import List -from pydantic import model_validator +from pydantic import Field, model_validator -from metagpt.actions import Action +from metagpt.actions.di.execute_nb_code import ExecuteNbCode from metagpt.actions.di.write_analysis_code import WriteAnalysisCode from metagpt.logs import logger -from metagpt.prompts.di.data_analyst import CMD_PROMPT -from metagpt.prompts.di.role_zero import JSON_REPAIR_PROMPT -from metagpt.roles.di.data_interpreter import DataInterpreter -from metagpt.schema import Message, TaskResult -from metagpt.strategy.experience_retriever import KeywordExpRetriever -from metagpt.strategy.planner import Planner -from metagpt.strategy.thinking_command import ( - Command, - prepare_command_prompt, - run_commands, -) -from metagpt.tools.tool_recommend import BM25ToolRecommender -from metagpt.utils.common import CodeParser -from metagpt.utils.report import ThoughtReporter -from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType +from metagpt.prompts.di.data_analyst import BROWSER_INSTRUCTION, TASK_TYPE_DESC, CODE_STATUS, BROWSER_INFO +from metagpt.prompts.di.role_zero import ROLE_INSTRUCTION +from metagpt.roles.di.role_zero import RoleZero +from metagpt.schema import TaskResult, Message +from metagpt.strategy.experience_retriever import ExpRetriever, KeywordExpRetriever +from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender +from metagpt.tools.tool_registry import register_tool -class DataAnalyst(DataInterpreter): +@register_tool(include_functions=["write_and_exec_code"]) +class DataAnalyst(RoleZero): name: str = "David" profile: str = "DataAnalyst" goal: str = "Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc." - react_mode: Literal["react"] = "react" - max_react_loop: int = 20 # used for react mode - task_result: TaskResult = None - available_commands: list[Command] = [ - Command.APPEND_TASK, - Command.RESET_TASK, - Command.REPLACE_TASK, - Command.FINISH_CURRENT_TASK, - # Command.PUBLISH_MESSAGE, - Command.ASK_HUMAN, - Command.REPLY_TO_HUMAN, - # Command.PASS, - ] - commands: list[dict] = [] # issued commands to be executed - user_requirement: str = "" + instruction: str = ROLE_INSTRUCTION + BROWSER_INSTRUCTION + task_type_desc: str = TASK_TYPE_DESC + + tools: list[str] = ["Plan", "DataAnalyst", "RoleZero", "Browser"] + custom_tools: list[str] = ["machine learning", "web scraping", "Terminal"] + custom_tool_recommender: ToolRecommender = None + experience_retriever: ExpRetriever = KeywordExpRetriever() + + use_reflection: bool = True + write_code: WriteAnalysisCode = Field(default_factory=WriteAnalysisCode, exclude=True) + execute_code: ExecuteNbCode = Field(default_factory=ExecuteNbCode, exclude=True) @model_validator(mode="after") - def set_plan_and_tool(self) -> "DataInterpreter": - # We force using this parameter for DataAnalyst - assert self.react_mode == "react" - assert self.auto_run - assert self.use_plan + def set_custom_tool(self): + if self.custom_tools and not self.custom_tool_recommender: + self.custom_tool_recommender = BM25ToolRecommender(tools=self.custom_tools) - # Roughly the same part as DataInterpreter.set_plan_and_tool - self._set_react_mode(react_mode=self.react_mode, max_react_loop=self.max_react_loop, auto_run=self.auto_run) - if self.tools and not self.tool_recommender: - self.tool_recommender = BM25ToolRecommender(tools=self.tools) - self.set_actions([WriteAnalysisCode]) + def _update_tool_execution(self): + self.tool_execution_map.update({ + "DataAnalyst.write_and_exec_code": self.write_and_exec_code, + }) - # HACK: Init Planner, control it through dynamic thinking; Consider formalizing as a react mode - self.planner = Planner(goal="", working_memory=self.rc.working_memory, auto_run=True) + async def parse_browser_actions(self, memory: List[Message]) -> List[Message]: + memory = await super().parse_browser_actions(memory) + browser_actions = [] + for index, msg in enumerate(memory): + if msg.cause_by == "browser": + browser_url = re.search('URL: (.*?)\\n', msg.content).group(1) + pattern = re.compile(r"Command Browser\.(\w+) executed") + browser_actions.append({ + 'command': pattern.match(memory[index - 1].content).group(1), + 'current url': browser_url + }) + if browser_actions: + browser_actions = BROWSER_INFO.format(browser_actions=browser_actions) + self.rc.working_memory.add(Message(content=browser_actions, role="user", cause_by="browser")) + return memory - return self + async def write_and_exec_code(self): + """Write a code block for current task and execute it in an interactive notebook environment.""" + counter = 0 + success = False + await self.execute_code.init_code() - async def _think(self) -> bool: - """Useful in 'react' mode. Use LLM to decide whether and what to do next.""" - self._set_state(0) - example = "" - if not self.planner.plan.goal: - self.user_requirement = self.get_memories()[-1].content - self.planner.plan.goal = self.user_requirement - example = KeywordExpRetriever().retrieve(self.user_requirement) + # plan info + plan_status = self.planner.get_plan_status() - plan_status = self.planner.plan.model_dump(include=["goal", "tasks"]) - # for task in plan_status["tasks"]: - # task.pop("code") - # task.pop("result") - prompt = CMD_PROMPT.format( - plan_status=plan_status, - example=example, - available_commands=prepare_command_prompt(self.available_commands), - ) - context = self.llm.format_msg(self.working_memory.get() + [Message(content=prompt, role="user")]) - # print(*context, sep="\n" + "*" * 5 + "\n") - async with ThoughtReporter(enable_llm_stream=True): - rsp = await self.llm.aask(context) + # tool info + if self.custom_tool_recommender: + plan = self.planner.plan + fixed = ["Terminal"] if "Terminal" in self.custom_tools else None + tool_info = await self.custom_tool_recommender.get_recommended_tool_info(fixed=fixed, plan=plan) + else: + tool_info = "" - # 临时方案,待role zero的版本完成可将本注释内的代码直接替换掉 - # -------------开始--------------- - try: - commands = CodeParser.parse_code(block=None, lang="json", text=rsp) - commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON)) - except json.JSONDecodeError as e: - commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=rsp)) - commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands)) - except Exception as e: - tb = traceback.format_exc() - print(tb) + while not success and counter < 3: + ### write code ### + logger.info(f"ready to WriteAnalysisCode") + use_reflection = (counter > 0 and self.use_reflection) # only use reflection after the first trial - # 为了对LLM不按格式生成进行容错 - if isinstance(commands, dict): - commands = commands["commands"] if "commands" in commands else [commands] - # -------------结束--------------- + code = await self.write_code.run( + user_requirement=self.planner.plan.goal, + plan_status=plan_status, + tool_info=tool_info, + working_memory=self.rc.working_memory.get(), + use_reflection=use_reflection, + ) + self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode)) - self.rc.working_memory.add(Message(content=rsp, role="assistant")) - await run_commands(self, commands, self.rc.working_memory) - return bool(self.rc.todo) + ### execute code ### + result, success = await self.execute_code.run(code) + print(result) - async def _act(self) -> Message: - """Useful in 'react' mode. Return a Message conforming to Role._act interface.""" - logger.info(f"ready to take on task {self.planner.plan.current_task}") + self.rc.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode)) - # TODO: Consider an appropriate location to insert task experience formally - experience = KeywordExpRetriever().retrieve(self.planner.plan.current_task.instruction, exp_type="task") - if experience and experience not in [msg.content for msg in self.rc.working_memory.get()]: - exp_msg = Message(content=experience, role="assistant") - self.rc.working_memory.add(exp_msg) + ### process execution result ### + counter += 1 + if success: + task_result = TaskResult(code=code, result=result, is_success=success) + self.planner.current_task.update_task_result(task_result) - code, result, is_success = await self._write_and_exec_code() - self.planner.plan.current_task.is_success = ( - is_success # mark is_success, determine is_finished later in thinking - ) - - # FIXME: task result is always overwritten by the last act, whereas it can be made of of multiple acts - self.task_result = TaskResult(code=code, result=result, is_success=is_success) - return Message(content="Task completed", role="assistant", sent_from=self._setting, cause_by=WriteAnalysisCode) - - async def _react(self) -> Message: - # NOTE: Diff 1: Each time landing here means observing news, set todo to allow news processing in _think - self._set_state(0) - - actions_taken = 0 - rsp = Message(content="No actions taken yet", cause_by=Action) # will be overwritten after Role _act - while actions_taken < self.rc.max_react_loop: - # NOTE: Diff 2: Keep observing within _react, news will go into memory, allowing adapting to new info - # add news from self._observe, the one called in self.run, consider removing when switching from working_memory to memory - self.working_memory.add_batch(self.rc.news) - await self._observe() - # add news from this self._observe, we need twice because _observe rewrites rc.news - self.working_memory.add_batch(self.rc.news) - - # think - has_todo = await self._think() - if not has_todo: - break - # act - logger.debug(f"{self._setting}: {self.rc.state=}, will do {self.rc.todo}") - rsp = await self._act() - actions_taken += 1 - return rsp # return output from the last action + status = 'Success' if success else 'Failed' + output = CODE_STATUS.format(code=code, status=status, result=result) + self.rc.working_memory.clear() + return output diff --git a/metagpt/roles/di/role_zero.py b/metagpt/roles/di/role_zero.py index 995e7e5d8..5f27c79ef 100644 --- a/metagpt/roles/di/role_zero.py +++ b/metagpt/roles/di/role_zero.py @@ -41,6 +41,7 @@ class RoleZero(Role): system_msg: list[str] = None # Use None to conform to the default value at llm.aask cmd_prompt: str = CMD_PROMPT instruction: str = ROLE_INSTRUCTION + task_type_desc: str = None # React Mode react_mode: Literal["react"] = "react" @@ -148,14 +149,10 @@ class RoleZero(Role): example=example, available_commands=tool_info, instruction=self.instruction.strip(), + task_type_desc=self.task_type_desc, ) memory = self.rc.memory.get(self.memory_k) - if not self.browser.is_empty_page: - pattern = re.compile(r"Command Browser\.(\w+) executed") - for index, msg in zip(range(len(memory), 0, -1), memory[::-1]): - if pattern.match(msg.content): - memory.insert(index, UserMessage(cause_by="browser", content=await self.browser.view())) - break + memory = await self.parse_browser_actions(memory) context = self.llm.format_msg(memory + [UserMessage(content=prompt)]) # print(*context, sep="\n" + "*" * 5 + "\n") async with ThoughtReporter(enable_llm_stream=True) as reporter: @@ -165,6 +162,15 @@ class RoleZero(Role): return True + async def parse_browser_actions(self, memory: List[Message]) -> List[Message]: + if not self.browser.is_empty_page: + pattern = re.compile(r"Command Browser\.(\w+) executed") + for index, msg in zip(range(len(memory), 0, -1), memory[::-1]): + if pattern.match(msg.content): + memory.insert(index, UserMessage(cause_by="browser", content=await self.browser.view())) + break + return memory + async def _act(self) -> Message: if self.use_fixed_sop: return await super()._act() @@ -267,13 +273,14 @@ class RoleZero(Role): async def _run_commands(self, commands) -> str: outputs = [] for cmd in commands: + output = f"Command {cmd['command_name']} executed" # handle special command first if await self._run_special_command(cmd): + outputs.append(output) continue # run command as specified by tool_execute_map if cmd["command_name"] in self.tool_execution_map: tool_obj = self.tool_execution_map[cmd["command_name"]] - output = f"Command {cmd['command_name']} executed" try: if inspect.iscoroutinefunction(tool_obj): tool_output = await tool_obj(**cmd["args"]) diff --git a/metagpt/schema.py b/metagpt/schema.py index 69c7a519b..94e64d7fa 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -464,8 +464,8 @@ class Task(BaseModel): self.is_finished = False def update_task_result(self, task_result: TaskResult): - self.code = task_result.code - self.result = task_result.result + self.code = self.code + "\n" + task_result.code + self.result = self.result + "\n" + task_result.result self.is_success = task_result.is_success @@ -669,10 +669,14 @@ class Plan(BaseModel): """ return [task for task in self.tasks if task.is_finished] - def append_task(self, task_id: str, dependent_task_ids: list[str], instruction: str, assignee: str): + def append_task(self, task_id: str, dependent_task_ids: list[str], instruction: str, assignee: str, task_type: str): """Append a new task with task_id (number) to the end of existing task sequences. If dependent_task_ids is not empty, the task will depend on the tasks with the ids in the list.""" new_task = Task( - task_id=task_id, dependent_task_ids=dependent_task_ids, instruction=instruction, assignee=assignee + task_id=task_id, + dependent_task_ids=dependent_task_ids, + instruction=instruction, + assignee=assignee, + task_type=task_type ) return self._append_task(new_task) diff --git a/metagpt/strategy/experience_retriever.py b/metagpt/strategy/experience_retriever.py index fb0fd7903..668b44c40 100644 --- a/metagpt/strategy/experience_retriever.py +++ b/metagpt/strategy/experience_retriever.py @@ -629,6 +629,8 @@ class KeywordExpRetriever(ExpRetriever): return DEPLOY_EXAMPLE elif "issue" in context.lower(): return FIX_ISSUE_EXAMPLE + elif "https:" or "http:" in context.lower(): + return WEB_SCRAPING_EXAMPLE elif exp_type == "task": if "diagnose" in context.lower(): return SEARCH_SYMBOL_EXAMPLE @@ -890,3 +892,122 @@ Explanation: to review the code, call ReviewAndRewriteCode.run. ] ``` """ + + +WEB_SCRAPING_EXAMPLE = """ +## action 1 +User Requirement: Scrap and list the restaurant names of first page by searching for the keyword `beef` on the website https://www.yelp.com/. +Explanation: The requirement is to scrape data from a website and extract information about restaurants. The process involves searching for restaurants with a specific keyword, retrieving and presenting the data in a structured format. + +```json +[ + { + "command_name": "Plan.append_task", + "args": { + "task_id": "1", + "dependent_task_ids": [], + "instruction": "Navigate to the yelp website.", + "assignee": "Browser" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "2", + "dependent_task_ids": ["1"], + "instruction": "Search for restaurants with the keyword 'beef'.", + "assignee": "Browser" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "3", + "dependent_task_ids": ["2"], + "instruction": "View the html content of the search result page before scrap data to understand the structure.", + "assignee": "DataAnalyst" + } + }, + { + "command_name": "Plan.append_task", + "args": { + "task_id": "4", + "dependent_task_ids": ["3"], + "instruction": "Parse the html content to scrape the restaurant names and print it.", + "assignee": "DataAnalyst" + } + } +] +``` + +## action 2 +Explanation: To search for restaurants, I will now go to the website https://www.yelp.com/ first. +Here is the command to navigate to the website: + +```json +[ + { + "command_name": "Browser.goto", + "args": { + "url": "https://www.yelp.com/" + } + } +] +``` + +## action 3 +Explanation: Since the Browser has successfully navigated to the website, and I find that the element id of the search box is 53. I will finish the current task and then use the Browser tool to type the keyword `beef` in the search box and press enter. +Here is the command to finish the current task and type the keyword: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "Browser.type", + "args": { + "element_id": 53, + "content": "beef", + "press_enter_after": true + } + } +] +``` + +## action 4 +Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page. +Here is the command to finish the current task and view the html content: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] +``` + +## action 5 +Explanation: Since the DataAnalyst has successfully viewed the html content of the page, I will finish the current task and then write code to parse the html content and extract the restaurant names. +Here is the command to finish the current task and parse the html content: + +```json +[ + { + "command_name": "Plan.finish_current_task", + "args": {} + }, + { + "command_name": "DataAnalyst.write_and_exec_code", + "args": {} + } +] + +... +""" diff --git a/metagpt/strategy/planner.py b/metagpt/strategy/planner.py index 427e41562..95ad1f5cc 100644 --- a/metagpt/strategy/planner.py +++ b/metagpt/strategy/planner.py @@ -40,8 +40,14 @@ PLAN_STATUS = """ ## Current Task {current_task} +## Finished Section of Current Task +### code +{current_task_code} +### execution result +{current_task_result} + ## Task Guidance -Write complete code for 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc. +Write code for the incomplete sections of 'Current Task'. And avoid duplicating code from 'Finished Tasks', such as repeated import of packages, reading data, etc. Specifically, {guidance} """ @@ -173,6 +179,8 @@ class Planner(BaseModel): code_written=code_written, task_results=task_results, current_task=self.current_task.instruction, + current_task_code=self.current_task.code if self.current_task.code else "", + current_task_result=self.current_task.result if self.current_task.result else "", guidance=guidance, ) diff --git a/metagpt/strategy/task_type.py b/metagpt/strategy/task_type.py index 2bc53b964..b44cc3ac0 100644 --- a/metagpt/strategy/task_type.py +++ b/metagpt/strategy/task_type.py @@ -8,7 +8,7 @@ from metagpt.prompts.task_type import ( FEATURE_ENGINEERING_PROMPT, IMAGE2WEBPAGE_PROMPT, MODEL_EVALUATE_PROMPT, - MODEL_TRAIN_PROMPT, + MODEL_TRAIN_PROMPT, WEB_SCRAPING_PROMPT, ) @@ -62,6 +62,7 @@ class TaskType(Enum): WEBSCRAPING = TaskTypeDef( name="web scraping", desc="For scraping data from web pages.", + guidance=WEB_SCRAPING_PROMPT, ) EMAIL_LOGIN = TaskTypeDef( name="email login", diff --git a/metagpt/tools/libs/__init__.py b/metagpt/tools/libs/__init__.py index 725ab73c9..c9de6bd21 100644 --- a/metagpt/tools/libs/__init__.py +++ b/metagpt/tools/libs/__init__.py @@ -5,11 +5,11 @@ # @File : __init__.py # @Desc : from metagpt.tools.libs import ( - # data_preprocess, - # feature_engineering, + data_preprocess, + feature_engineering, sd_engine, gpt_v_generator, - # web_scraping, + web_scraping, # email_login, terminal, editor, @@ -20,11 +20,11 @@ from metagpt.tools.libs import ( from metagpt.tools.libs.env import get_env, set_get_env_entry, default_get_env, get_env_description _ = ( - # data_preprocess, - # feature_engineering, + data_preprocess, + feature_engineering, sd_engine, gpt_v_generator, - # web_scraping, + web_scraping, # email_login, terminal, editor, diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py index 489c3a472..9e7a8041c 100644 --- a/metagpt/tools/libs/web_scraping.py +++ b/metagpt/tools/libs/web_scraping.py @@ -8,13 +8,15 @@ from metagpt.utils.parse_html import simplify_html @register_tool(tags=["web scraping"]) -async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None: - """view the HTML content of current page to understand the structure. When executed, the content will be printed out +async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> str: + """view the HTML content of current page to understand the structure. Args: url (str): The URL of the web page to scrape. requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements. keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required + Returns: + str: The HTML content of the page. """ async with Browser() as browser: await browser.goto(url) @@ -36,7 +38,7 @@ async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bo html = "\n".join(i.text for i in nodes) mem_fs.rm_file(filename) - print(html) + return html # async def get_elements_outerhtml(self, element_ids: list[int]): diff --git a/metagpt/tools/tool_recommend.py b/metagpt/tools/tool_recommend.py index 9f425fffe..4bea137c3 100644 --- a/metagpt/tools/tool_recommend.py +++ b/metagpt/tools/tool_recommend.py @@ -104,11 +104,13 @@ class ToolRecommender(BaseModel): return ranked_tools - async def get_recommended_tool_info(self, **kwargs) -> str: + async def get_recommended_tool_info(self, fixed: list[str] = None, **kwargs) -> str: """ Wrap recommended tools with their info in a string, which can be used directly in a prompt. """ recommended_tools = await self.recommend_tools(**kwargs) + if fixed: + recommended_tools.extend([self.tools[tool_name] for tool_name in fixed if tool_name in self.tools]) if not recommended_tools: return "" tool_schemas = {tool.name: tool.schemas for tool in recommended_tools} diff --git a/metagpt/utils/parse_html.py b/metagpt/utils/parse_html.py index 1ed3a620c..985e54d96 100644 --- a/metagpt/utils/parse_html.py +++ b/metagpt/utils/parse_html.py @@ -41,7 +41,7 @@ class WebPage(BaseModel): def get_slim_soup(self, keep_links: bool = False): soup = _get_soup(self.html) - keep_attrs = ["class"] + keep_attrs = ["class", "id"] if keep_links: keep_attrs.append("href") diff --git a/tests/metagpt/roles/di/run_data_analyst.py b/tests/metagpt/roles/di/run_data_analyst.py new file mode 100644 index 000000000..b7b48e0db --- /dev/null +++ b/tests/metagpt/roles/di/run_data_analyst.py @@ -0,0 +1,54 @@ +from metagpt.roles.di.data_analyst import DataAnalyst + +HOUSE_PRICE_TRAIN_PATH = '/data/house-prices-advanced-regression-techniques/split_train.csv' +HOUSE_PRICE_EVAL_PATH = '/data/house-prices-advanced-regression-techniques/split_eval.csv' +HOUSE_PRICE_REQ = f""" +This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: '{HOUSE_PRICE_TRAIN_PATH}', eval data path: '{HOUSE_PRICE_EVAL_PATH}'. +""" + +CALIFORNIA_HOUSING_REQ = """ +Analyze the 'Canifornia-housing-dataset' using https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_california_housing.html#sklearn.datasets.fetch_california_housing to predict the median house value. you need to perfrom data preprocessing, feature engineering and finally modeling to predict the target. Use machine learning techniques such as linear regression (including ridge regression and lasso regression), random forest, CatBoost, LightGBM, XGBoost or other appropriate method. You also need to report the MSE on the test dataset +""" + +# For web scraping task, please provide url begin with `https://` or `http://` +PAPER_LIST_REQ = """" +Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, +and save it to a csv file. paper title must include `multiagent` or `large language model`. +**Notice: view the page element before writing scraping code** +""" + +ECOMMERCE_REQ = """ +Get products data from website https://scrapeme.live/shop/ and save it as a csv file. +The first page product name, price, product URL, and image URL must be saved in the csv. +**Notice: view the page element before writing scraping code** +""" + +NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**; +下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整: +1. 爬取并本地保存html结构; +2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*; +3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间; +4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。 +5. 将全部结果存在本地csv中 +**Notice: view the page element before writing scraping code** +""" + +WIKIPEDIA_SEARCH_REQ = """ +Search for `LLM` on https://www.wikipedia.org/ and print all the meaningful significances of the entry. +""" + +STACKOVERFLOW_CLICK_REQ = """ +Click the Questions tag in https://stackoverflow.com/ and scrap question name, votes, answers and views num to csv in the first result page. +""" + + +async def main(): + di = DataAnalyst() + await di.browser.start() + await di.run(STACKOVERFLOW_CLICK_REQ) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(main())