Merge branch 'data_analyst_ldy' into 'mgx_ops'

Data analyst ldy

See merge request pub/MetaGPT!255
This commit is contained in:
林义章 2024-07-29 09:52:37 +00:00
commit c1536bc5b9
6 changed files with 29 additions and 51 deletions

View file

@ -43,6 +43,7 @@ class WriteAnalysisCode(Action):
tool_info: str = "",
working_memory: list[Message] = None,
use_reflection: bool = False,
memory: list[Message] = None,
**kwargs,
) -> str:
structual_prompt = STRUCTUAL_PROMPT.format(
@ -52,7 +53,8 @@ class WriteAnalysisCode(Action):
)
working_memory = working_memory or []
context = self.llm.format_msg([Message(content=structual_prompt, role="user")] + working_memory)
memory = memory or []
context = self.llm.format_msg(memory + [Message(content=structual_prompt, role="user")] + working_memory)
# LLM call
if use_reflection:

View file

@ -2,9 +2,9 @@ from metagpt.strategy.task_type import TaskType
EXTRA_INSTRUCTION = """
6. Carefully choose to use or not use the browser tool to assist you in web tasks.
- When no click action is required, no need to use the browser tool to navigate to the webpage before scraping.
- If you need detail HTML content, write code to get it but not to use the browser tool.
- Make sure the command_name are certainly in Available Commands when you use the browser tool.
- When no click action is required, no need to use the Browser tool to navigate to the webpage before scraping.
- Write code to view the HTML content rather than using the Browser tool.
- Make sure the command_name are certainly in Available Commands when you use the Browser tool.
7. When you are making plan. It is highly recommend to plan and append all the tasks in first response once time.
8. Don't finish_current_task multiple times for the same task.
9. Finish current task timely, such as when the code is written and executed successfully.
@ -20,10 +20,3 @@ CODE_STATUS = """
**Execution status**: {status}
**Execution result**: {result}
"""
BROWSER_INFO = """
Here are ordered web actions in the browser environment, note that you can not use the browser tool in the current environment.
{browser_actions}
The latest url is the one you should use to view the page. If view page has been done, directly use the variable and html content in executing result.
"""

View file

@ -57,5 +57,5 @@ The current task is about converting image into webpage code. please note the fo
# Prompt for taking on "web_scraping" tasks
WEB_SCRAPING_PROMPT = """
- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. Such as `html_content = await view_page_element_to_scrape(...)\nprint(html_content)`.
- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in the executing result first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code.
- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in your context first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code.
"""

View file

@ -1,15 +1,11 @@
from __future__ import annotations
import re
from typing import List
from pydantic import Field, model_validator
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
from metagpt.logs import logger
from metagpt.prompts.di.data_analyst import (
BROWSER_INFO,
CODE_STATUS,
EXTRA_INSTRUCTION,
TASK_TYPE_DESC,
@ -51,37 +47,18 @@ class DataAnalyst(RoleZero):
}
)
async def parse_browser_actions(self, memory: List[Message]) -> List[Message]:
memory = await super().parse_browser_actions(memory)
browser_actions = []
for index, msg in enumerate(memory):
if msg.cause_by == "browser":
browser_url = re.search("URL: (.*?)\\n", msg.content).group(1)
pattern = re.compile(r"Command Browser\.(\w+) executed")
browser_actions.append(
{"command": pattern.match(memory[index - 1].content).group(1), "current url": browser_url}
)
if browser_actions:
browser_actions = BROWSER_INFO.format(browser_actions=browser_actions)
self.rc.working_memory.add(Message(content=browser_actions, role="user", cause_by="browser"))
return memory
async def write_and_exec_code(self, instruction: str = ""):
"""Write a code block for current task and execute it in an interactive notebook environment.
Args:
instruction: The specific task description for which the code needs to be written.
"""
if self.planner.plan:
logger.info(f"Current task {self.planner.plan.current_task}")
async def write_and_exec_code(self):
"""Write a code block for current task step and execute it in an interactive notebook environment."""
counter = 0
success = False
await self.execute_code.init_code()
# plan info
plan_status = self.planner.get_plan_status()
plan_status = plan_status + f"\nFurther Task Instruction: {instruction}"
if self.planner.current_task:
# clear task result from plan to save token, since it has been in memory
plan_status = self.planner.get_plan_status(exclude=["task_result"])
else:
return "No current_task found now. Please use command Plan.append_task to add a task first."
# tool info
if self.custom_tool_recommender:
@ -102,6 +79,7 @@ class DataAnalyst(RoleZero):
tool_info=tool_info,
working_memory=self.rc.working_memory.get(),
use_reflection=use_reflection,
memory=self.rc.memory.get(self.memory_k),
)
self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode))

View file

@ -984,7 +984,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
"args": {
"task_id": "3",
"dependent_task_ids": ["2"],
"instruction": "View the html content of the search result page before scrap data to understand the structure.",
"instruction": "View and print the html content of the search result page before scrap data to understand the structure.",
"assignee": "David"
}
},
@ -1035,7 +1035,7 @@ Explanation: Since the Browser has successfully navigated to the website, and I
```
## action 4
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page.
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view and print the html content of the page.
```json
[
@ -1081,7 +1081,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
"args": {
"task_id": "1",
"dependent_task_ids": [],
"instruction": "View the html content of the page before scrap data to understand the structure.",
"instruction": "View and print the html content of the page before scrap data to understand the structure.",
"assignee": "David"
}
},
@ -1098,7 +1098,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
```
## action 2
Explanation: To scrap data from the website, I will first view the html content of the page.
Explanation: To scrap data from the website, I will first view and print the html content of the page.
```json
[

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import json
from typing import List
from pydantic import BaseModel, Field
@ -42,7 +43,9 @@ PLAN_STATUS = """
## Finished Section of Current Task
### code
```python
{current_task_code}
```
### execution result
{current_task_result}
@ -163,8 +166,10 @@ class Planner(BaseModel):
return context_msg + self.working_memory.get()
def get_plan_status(self) -> str:
def get_plan_status(self, exclude: List[str] = None) -> str:
# prepare components of a plan status
exclude = exclude or []
exclude_prompt = "omit here"
finished_tasks = self.plan.get_finished_tasks()
code_written = [remove_comments(task.code) for task in finished_tasks]
code_written = "\n\n".join(code_written)
@ -176,11 +181,11 @@ class Planner(BaseModel):
# combine components in a prompt
prompt = PLAN_STATUS.format(
code_written=code_written,
task_results=task_results,
code_written=code_written if "code" not in exclude else exclude_prompt,
task_results=task_results if "task_result" not in exclude else exclude_prompt,
current_task=self.current_task.instruction,
current_task_code=self.current_task.code if self.current_task.code else "",
current_task_result=self.current_task.result if self.current_task.result else "",
current_task_code=self.current_task.code if "code" not in exclude else exclude_prompt,
current_task_result=self.current_task.result if "task_result" not in exclude else exclude_prompt,
guidance=guidance,
)