mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
Merge branch 'data_analyst_ldy' into 'mgx_ops'
Data analyst ldy See merge request pub/MetaGPT!255
This commit is contained in:
commit
c1536bc5b9
6 changed files with 29 additions and 51 deletions
|
|
@ -43,6 +43,7 @@ class WriteAnalysisCode(Action):
|
|||
tool_info: str = "",
|
||||
working_memory: list[Message] = None,
|
||||
use_reflection: bool = False,
|
||||
memory: list[Message] = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
structual_prompt = STRUCTUAL_PROMPT.format(
|
||||
|
|
@ -52,7 +53,8 @@ class WriteAnalysisCode(Action):
|
|||
)
|
||||
|
||||
working_memory = working_memory or []
|
||||
context = self.llm.format_msg([Message(content=structual_prompt, role="user")] + working_memory)
|
||||
memory = memory or []
|
||||
context = self.llm.format_msg(memory + [Message(content=structual_prompt, role="user")] + working_memory)
|
||||
|
||||
# LLM call
|
||||
if use_reflection:
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ from metagpt.strategy.task_type import TaskType
|
|||
|
||||
EXTRA_INSTRUCTION = """
|
||||
6. Carefully choose to use or not use the browser tool to assist you in web tasks.
|
||||
- When no click action is required, no need to use the browser tool to navigate to the webpage before scraping.
|
||||
- If you need detail HTML content, write code to get it but not to use the browser tool.
|
||||
- Make sure the command_name are certainly in Available Commands when you use the browser tool.
|
||||
- When no click action is required, no need to use the Browser tool to navigate to the webpage before scraping.
|
||||
- Write code to view the HTML content rather than using the Browser tool.
|
||||
- Make sure the command_name are certainly in Available Commands when you use the Browser tool.
|
||||
7. When you are making plan. It is highly recommend to plan and append all the tasks in first response once time.
|
||||
8. Don't finish_current_task multiple times for the same task.
|
||||
9. Finish current task timely, such as when the code is written and executed successfully.
|
||||
|
|
@ -20,10 +20,3 @@ CODE_STATUS = """
|
|||
**Execution status**: {status}
|
||||
**Execution result**: {result}
|
||||
"""
|
||||
|
||||
|
||||
BROWSER_INFO = """
|
||||
Here are ordered web actions in the browser environment, note that you can not use the browser tool in the current environment.
|
||||
{browser_actions}
|
||||
The latest url is the one you should use to view the page. If view page has been done, directly use the variable and html content in executing result.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -57,5 +57,5 @@ The current task is about converting image into webpage code. please note the fo
|
|||
# Prompt for taking on "web_scraping" tasks
|
||||
WEB_SCRAPING_PROMPT = """
|
||||
- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. Such as `html_content = await view_page_element_to_scrape(...)\nprint(html_content)`.
|
||||
- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in the executing result first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code.
|
||||
- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in your context first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,15 +1,11 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
from pydantic import Field, model_validator
|
||||
|
||||
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
|
||||
from metagpt.actions.di.write_analysis_code import WriteAnalysisCode
|
||||
from metagpt.logs import logger
|
||||
from metagpt.prompts.di.data_analyst import (
|
||||
BROWSER_INFO,
|
||||
CODE_STATUS,
|
||||
EXTRA_INSTRUCTION,
|
||||
TASK_TYPE_DESC,
|
||||
|
|
@ -51,37 +47,18 @@ class DataAnalyst(RoleZero):
|
|||
}
|
||||
)
|
||||
|
||||
async def parse_browser_actions(self, memory: List[Message]) -> List[Message]:
|
||||
memory = await super().parse_browser_actions(memory)
|
||||
browser_actions = []
|
||||
for index, msg in enumerate(memory):
|
||||
if msg.cause_by == "browser":
|
||||
browser_url = re.search("URL: (.*?)\\n", msg.content).group(1)
|
||||
pattern = re.compile(r"Command Browser\.(\w+) executed")
|
||||
browser_actions.append(
|
||||
{"command": pattern.match(memory[index - 1].content).group(1), "current url": browser_url}
|
||||
)
|
||||
if browser_actions:
|
||||
browser_actions = BROWSER_INFO.format(browser_actions=browser_actions)
|
||||
self.rc.working_memory.add(Message(content=browser_actions, role="user", cause_by="browser"))
|
||||
return memory
|
||||
|
||||
async def write_and_exec_code(self, instruction: str = ""):
|
||||
"""Write a code block for current task and execute it in an interactive notebook environment.
|
||||
|
||||
Args:
|
||||
instruction: The specific task description for which the code needs to be written.
|
||||
"""
|
||||
if self.planner.plan:
|
||||
logger.info(f"Current task {self.planner.plan.current_task}")
|
||||
|
||||
async def write_and_exec_code(self):
|
||||
"""Write a code block for current task step and execute it in an interactive notebook environment."""
|
||||
counter = 0
|
||||
success = False
|
||||
await self.execute_code.init_code()
|
||||
|
||||
# plan info
|
||||
plan_status = self.planner.get_plan_status()
|
||||
plan_status = plan_status + f"\nFurther Task Instruction: {instruction}"
|
||||
if self.planner.current_task:
|
||||
# clear task result from plan to save token, since it has been in memory
|
||||
plan_status = self.planner.get_plan_status(exclude=["task_result"])
|
||||
else:
|
||||
return "No current_task found now. Please use command Plan.append_task to add a task first."
|
||||
|
||||
# tool info
|
||||
if self.custom_tool_recommender:
|
||||
|
|
@ -102,6 +79,7 @@ class DataAnalyst(RoleZero):
|
|||
tool_info=tool_info,
|
||||
working_memory=self.rc.working_memory.get(),
|
||||
use_reflection=use_reflection,
|
||||
memory=self.rc.memory.get(self.memory_k),
|
||||
)
|
||||
self.rc.working_memory.add(Message(content=code, role="assistant", cause_by=WriteAnalysisCode))
|
||||
|
||||
|
|
|
|||
|
|
@ -984,7 +984,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
|
|||
"args": {
|
||||
"task_id": "3",
|
||||
"dependent_task_ids": ["2"],
|
||||
"instruction": "View the html content of the search result page before scrap data to understand the structure.",
|
||||
"instruction": "View and print the html content of the search result page before scrap data to understand the structure.",
|
||||
"assignee": "David"
|
||||
}
|
||||
},
|
||||
|
|
@ -1035,7 +1035,7 @@ Explanation: Since the Browser has successfully navigated to the website, and I
|
|||
```
|
||||
|
||||
## action 4
|
||||
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view the html content of the page.
|
||||
Explanation: Since the Browser has successfully search the keyword `beef`, I will finish the current task and then write code to view and print the html content of the page.
|
||||
|
||||
```json
|
||||
[
|
||||
|
|
@ -1081,7 +1081,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
|
|||
"args": {
|
||||
"task_id": "1",
|
||||
"dependent_task_ids": [],
|
||||
"instruction": "View the html content of the page before scrap data to understand the structure.",
|
||||
"instruction": "View and print the html content of the page before scrap data to understand the structure.",
|
||||
"assignee": "David"
|
||||
}
|
||||
},
|
||||
|
|
@ -1098,7 +1098,7 @@ Explanation: The requirement is to scrape data from a website and extract inform
|
|||
```
|
||||
|
||||
## action 2
|
||||
Explanation: To scrap data from the website, I will first view the html content of the page.
|
||||
Explanation: To scrap data from the website, I will first view and print the html content of the page.
|
||||
|
||||
```json
|
||||
[
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
|
@ -42,7 +43,9 @@ PLAN_STATUS = """
|
|||
|
||||
## Finished Section of Current Task
|
||||
### code
|
||||
```python
|
||||
{current_task_code}
|
||||
```
|
||||
### execution result
|
||||
{current_task_result}
|
||||
|
||||
|
|
@ -163,8 +166,10 @@ class Planner(BaseModel):
|
|||
|
||||
return context_msg + self.working_memory.get()
|
||||
|
||||
def get_plan_status(self) -> str:
|
||||
def get_plan_status(self, exclude: List[str] = None) -> str:
|
||||
# prepare components of a plan status
|
||||
exclude = exclude or []
|
||||
exclude_prompt = "omit here"
|
||||
finished_tasks = self.plan.get_finished_tasks()
|
||||
code_written = [remove_comments(task.code) for task in finished_tasks]
|
||||
code_written = "\n\n".join(code_written)
|
||||
|
|
@ -176,11 +181,11 @@ class Planner(BaseModel):
|
|||
|
||||
# combine components in a prompt
|
||||
prompt = PLAN_STATUS.format(
|
||||
code_written=code_written,
|
||||
task_results=task_results,
|
||||
code_written=code_written if "code" not in exclude else exclude_prompt,
|
||||
task_results=task_results if "task_result" not in exclude else exclude_prompt,
|
||||
current_task=self.current_task.instruction,
|
||||
current_task_code=self.current_task.code if self.current_task.code else "",
|
||||
current_task_result=self.current_task.result if self.current_task.result else "",
|
||||
current_task_code=self.current_task.code if "code" not in exclude else exclude_prompt,
|
||||
current_task_result=self.current_task.result if "task_result" not in exclude else exclude_prompt,
|
||||
guidance=guidance,
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue