Merge dev to dev_tool_selection

This commit is contained in:
lidanyang 2023-12-06 17:08:09 +08:00
commit 56dd0ee882
8 changed files with 534 additions and 23 deletions

View file

@ -10,12 +10,12 @@ from metagpt.actions import Action
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
from metagpt.actions.write_plan import WritePlan
from metagpt.actions.write_task_guide import WriteTaskGuide
from metagpt.logs import logger
from metagpt.prompts.ml_engineer import GEN_DATA_DESC_PROMPT
from metagpt.roles import Role
from metagpt.schema import Message, Plan
from metagpt.utils.common import CodeParser
from metagpt.actions.write_code_steps import WriteCodeSteps
STRUCTURAL_CONTEXT = """
## User Requirement
@ -123,11 +123,6 @@ class AskReview(Action):
return rsp, confirmed
# class WriteTaskGuide(Action):
# async def run(self, task_instruction: str, data_desc: dict = None) -> str:
# return ""
class GenerateDataDesc(Action):
async def run(self, files: list) -> dict:
data_desc = {}
@ -154,7 +149,7 @@ class MLEngineer(Role):
self._set_react_mode(react_mode="plan_and_act")
self.plan = Plan(goal=goal)
self.use_tools = True
self.use_task_guide = True
self.use_code_steps = True
self.execute_code = ExecutePyCode()
self.auto_run = auto_run
self.data_path = data_path
@ -172,7 +167,7 @@ class MLEngineer(Role):
logger.info(f"ready to take on task {task}")
# take on current task
code, result, success = await self._write_and_exec_code()
code, result, success, code_steps = await self._write_and_exec_code()
# ask for acceptance, users can other refuse and change tasks in the plan
task_result_confirmed = await self._ask_review()
@ -181,6 +176,7 @@ class MLEngineer(Role):
# tick off this task and record progress
task.code = code
task.result = result
task.code_steps = code_steps
self.plan.finish_current_task()
self.working_memory.clear()
@ -194,9 +190,9 @@ class MLEngineer(Role):
return data_desc
async def _write_and_exec_code(self, max_retry: int = 3):
task_guide = (
await WriteTaskGuide().run(self.plan)
if self.use_task_guide
code_steps = (
await WriteCodeSteps().run(self.plan)
if self.use_code_steps
else ""
)
@ -214,13 +210,13 @@ class MLEngineer(Role):
logger.info("Write code with pure generation")
# code = "print('abc')"
code = await WriteCodeByGenerate().run(
context=context, plan=self.plan, task_guide=task_guide, temperature=0.0
context=context, plan=self.plan, code_steps=code_steps, temperature=0.0
)
cause_by = WriteCodeByGenerate
else:
logger.info("Write code with tools")
code = await WriteCodeWithTools().run(
context=context, plan=self.plan, task_guide=task_guide
context=context, plan=self.plan, code_steps=code_steps,
)
cause_by = WriteCodeWithTools
@ -243,7 +239,7 @@ class MLEngineer(Role):
counter += 1
return code, result, success
return code, result, success, code_steps
async def _ask_review(self):
if not self.auto_run:
@ -272,7 +268,7 @@ class MLEngineer(Role):
def get_useful_memories(self) -> List[Message]:
"""find useful memories only to reduce context length and improve performance"""
# TODO dataset description , code steps
user_requirement = self.plan.goal
tasks = json.dumps(
[task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
@ -294,11 +290,11 @@ class MLEngineer(Role):
if __name__ == "__main__":
# requirement = "Run data analysis on sklearn Iris dataset, include a plot.."
# requirement = "Run data analysis on sklearn Iris dataset, include a plot"
# requirement = "Run data analysis on sklearn Diabetes dataset, include a plot"
# requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
# requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
# requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy."
data_path = "/data/lidanyang/tabular_data/titanic"