more plan operation, review update, add kaggle team

2026-05-15 11:02:36 +02:00 · 2023-12-02 01:34:22 +08:00 · 2023-12-02 01:34:22 +08:00 · d3d08fe5f3
commit d3d08fe5f3
parent 8b3d640dd6
10 changed files with 330 additions and 88 deletions
--- a/metagpt/roles/kaggle_manager.py
+++ b/metagpt/roles/kaggle_manager.py
@ -5,16 +5,18 @@ import subprocess
 import fire
 import pandas as pd

+from metagpt.config import CONFIG
 from metagpt.const import WORKSPACE_ROOT
 from metagpt.roles import Role
 from metagpt.actions import Action, BossRequirement
-from metagpt.actions.write_analysis_code import AskReview, SummarizeAnalysis
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis
 from metagpt.schema import Message, Task, Plan
 from metagpt.logs import logger
+from metagpt.utils.common import CodeParser

 import os
-os.environ["KAGGLE_USERNAME"] = "xxx"
-os.environ["KAGGLE_KEY"] = "xxx"
+os.environ["KAGGLE_USERNAME"] = CONFIG.kaggle_username
+os.environ["KAGGLE_KEY"] = CONFIG.kaggle_key

 def run_command(cmd):
    print(cmd)
@ -38,6 +40,7 @@ class DownloadData(Action):
        
        # if not os.path.exists(data_path):
        if True:
+            # run_command(f"rm -r {data_path / '*'}")
            run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}")  # FIXME: not safe
        
        file_list = run_command(f"ls {data_path}")
@ -52,24 +55,30 @@ class DownloadData(Action):

 class SubmitResult(Action):
    PROMPT_TEMPLATE = """
-    # Context
-    {context}
+    # Summary
+    __summary__
    # Your task
-    Extract the prediction file for test set, return only the path string, e.g., xxx.csv, xxx.xlsx
+    Extract the file path for test set prediction from the summary above, output a json following the format:
+    ```json
+    {"file_path": str = "the file path, for example, /path/to/the/prediction/file/xxx.csv, /path/to/the/prediction/file/xxx.xlsx"}
+    ```
    """

    def __init__(self, name: str = "", context=None, llm=None) -> str:
        super().__init__(name, context, llm)

    async def _parse_submit_file_path(self, context) -> str:
-        prompt = self.PROMPT_TEMPLATE.format(context=context)
+        prompt = self.PROMPT_TEMPLATE.replace("__summary__", context)
        rsp = await self._aask(prompt)
-        return rsp
+        rsp = CodeParser.parse_code(block=None, text=rsp)
+        file_path = json.loads(rsp)["file_path"]
+        return file_path

    async def run(self, competition, submit_message="") -> str:
-        submit_file_path = self._parse_submit_file_path(submit_message)
+        submit_file_path = await self._parse_submit_file_path(submit_message)

        data_path = WORKSPACE_ROOT / competition
+        submit_message = submit_message.replace("'", "")

        run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
        run_command(f"kaggle competitions leaderboard --show --csv {competition} > {data_path / 'leaderboard.csv'}")
@ -77,20 +86,20 @@ class SubmitResult(Action):
        
        leaderboard = pd.read_csv(data_path / 'leaderboard.csv')
        submission = pd.read_csv(data_path / 'submission.csv')
-        submission_score = submission.loc[0, "publicScore"]
-        submission_rank = leaderboard.loc[leaderboard["score"] == submission_score].index[0]
-        submission_rank_pct = round(submission_rank / len(leaderboard), 4) * 100
+        print(submission)  # submission.to_json(orient="records")

-        # best_score = max(submission["publicScore"])
-        # best_rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        submission_score = submission.loc[0, "publicScore"]
+        best_score = max(submission["publicScore"])  # might be min
+        rank = leaderboard.loc[leaderboard["score"] == best_score].index[0]
+        rank_pct = round(rank / len(leaderboard), 4) * 100

        submission_summary = f"""
-        ## All History
-        {submission.to_json(orient="records")}
-        ## Current
-        Current submission score: {submission_score}, rank: {submission_rank} (top {submission_rank_pct}%);
+        # All histories:
+        {submission.head(5).to_string()}
+        # Current
+        Current submission score: {submission_score}, best score: {best_score}, best rank: {rank} (top {rank_pct}%)
        """
-        print(submission_summary)
+        logger.info(submission_summary)
        return submission_summary


@ -110,8 +119,6 @@ class KaggleManager(Role):
            self._set_state(0)  # DownloadData, get competition of interest from human, download datasets
        elif observed == SummarizeAnalysis:
            self._set_state(1)  # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
-        elif observed == SubmitResult:
-            self._set_state(2)  # AskReview, ask human for improvement

    async def _act(self):
        todo = self._rc.todo
@ -127,3 +134,19 @@ class KaggleManager(Role):
        msg = Message(content=rsp, role="user", cause_by=type(todo))

        return msg
+
+if __name__ == "__main__":
+    competition, data_desc, requirement = (
+        "titanic",
+        "Training set is train.csv.\nTest set is test.csv. We also include gender_submission.csv, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.",
+        "Run EDA on the train dataset, train a model to predict survival (20% as validation) and save it, predict the test set using saved model, save the test result according to format",
+    )
+
+    summary = "I used Python with pandas for data preprocessing, sklearn's RandomForestClassifier for modeling, and achieved 82.12% accuracy on validation. Predictions saved at '/Users/gary/Desktop/data_agents_opt/workspace/titanic/gender_submission.csv'."
+
+    async def main(requirement: str = requirement):
+        role = KaggleManager(competition=competition, data_desc=data_desc)
+        # await role.run(Message(content="", cause_by=BossRequirement))
+        await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
+
+    fire.Fire(main)
--- a/metagpt/roles/ml_engineer.py
+++ b/metagpt/roles/ml_engineer.py
@ -7,55 +7,14 @@ import fire
 from metagpt.roles import Role
 from metagpt.actions import Action
 from metagpt.schema import Message, Task, Plan
+from metagpt.memory import Memory
 from metagpt.logs import logger
 from metagpt.actions.write_plan import WritePlan
 from metagpt.actions.write_analysis_code import WriteCodeByGenerate, WriteCodeWithTools
+from metagpt.actions.ml_da_action import AskReview, SummarizeAnalysis, Reflect, ReviewConst, truncate
 from metagpt.actions.execute_code import ExecutePyCode
-
-STRUCTURAL_CONTEXT = """
-## User Requirement
-{user_requirement}
-## Current Plan
-{tasks}
-## Current Task
-{current_task}
-"""
-
-
-def truncate(result: str, keep_len: int = 1000) -> str:
-    desc = "Truncated to show only the last 1000 characters\n"
-    if result.startswith(desc):
-        result = result[-len(desc) :]
-
-    if len(result) > keep_len:
-        result = result[-keep_len:]
-
-    if not result.startswith(desc):
-        return desc + result
-    return desc
-
-
-class AskReview(Action):
-    async def run(self, context: List[Message], plan: Plan = None):
-        logger.info("Current overall plan:")
-        logger.info(
-            "\n".join([f"{task.task_id}: {task.instruction}, is_finished: {task.is_finished}" for task in plan.tasks])
-        )
-
-        logger.info("most recent context:")
-        latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
-        prompt = f"\nPlease review output from {latest_action}:\n" \
-            "If you want to change a task in the plan, say 'change task task_id, ... (things to change)'\n" \
-            "If you confirm the output and wish to continue with the current process, type CONFIRM\n" \
-            "If you want to terminate the process, type exit:\n"
-        rsp = input(prompt)
-
-        if rsp.lower() in ("exit"):
-            exit()
-
-        confirmed = rsp.lower() in ("confirm", "yes", "y")
-
-        return rsp, confirmed
+from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
+from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT


 class WriteTaskGuide(Action):
@ -69,13 +28,35 @@ class MLEngineer(Role):
    ):
        super().__init__(name=name, profile=profile, goal=goal)
        self._set_react_mode(react_mode="plan_and_act")
+        self._watch([DownloadData, SubmitResult])
+
        self.plan = Plan(goal=goal)
        self.use_tools = False
        self.use_task_guide = False
        self.execute_code = ExecutePyCode()
        self.auto_run = auto_run

+        # memory for working on each task, discarded each time a task is done
+        self.working_memory = Memory()
+
    async def _plan_and_act(self):
+
+        ### Actions in a multi-agent multi-turn setting ###
+        memories = self.get_memories()
+        if memories:
+            latest_event = memories[-1].cause_by
+            if latest_event == DownloadData:
+                self.plan.context = memories[-1].content
+            elif latest_event == SubmitResult:
+                # get feedback for improvement from human, add to working memory
+                await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)
+                # self reflect on previous plan outcomes and think about how to improve the plan, add to working  memory
+                prev_plan_outcomes = memories[-1].content
+                reflection = await Reflect().run(context=prev_plan_outcomes)
+                self.working_memory.add(Message(content=reflection, role="assistant"))
+
+
+        ### Common Procedure in both single- and multi-agent setting ###
        # create initial plan and update until confirmation
        await self._update_plan()

@ -87,7 +68,7 @@ class MLEngineer(Role):
            code, result, success = await self._write_and_exec_code()

            # ask for acceptance, users can other refuse and change tasks in the plan
-            task_result_confirmed = await self._ask_review()
+            review, task_result_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)

            if success and task_result_confirmed:
                # tick off this task and record progress
@ -98,7 +79,16 @@ class MLEngineer(Role):

            else:
                # update plan according to user's feedback and to take on changed tasks
-                await self._update_plan()
+                await self._update_plan(review)
+
+        completed_plan_memory = self.get_useful_memories()  # completed plan as a outcome
+        self._rc.memory.add(completed_plan_memory[0])  # add to persistent memory
+
+        summary = await SummarizeAnalysis().run(self.plan)
+        rsp = Message(content=summary, cause_by=SummarizeAnalysis)
+        self._rc.memory.add(rsp)
+
+        return rsp

    async def _write_and_exec_code(self, max_retry: int = 3):
        task_guide = (
@ -143,23 +133,28 @@ class MLEngineer(Role):

            if "!pip" in code:
                success = False
-            # if not success:
-            #     await self._ask_review()

            counter += 1

+            if not success and counter >= max_retry:
+                logger.info("coding failed!")
+                review, _ = await self._ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER)
+                if ReviewConst.CHANGE_WORD in review:
+                    counter = 0  # redo the task again with help of human suggestions
+
        return code, result, success

-    async def _ask_review(self):
-        if not self.auto_run:
+    async def _ask_review(self, auto_run: bool = None, trigger: str = ReviewConst.TASK_REVIEW_TRIGGER):
+        auto_run = auto_run or self.auto_run
+        if not auto_run:
            context = self.get_useful_memories()
-            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan)
+            review, confirmed = await AskReview().run(context=context[-5:], plan=self.plan, trigger=trigger)
            if not confirmed:
                self.working_memory.add(Message(content=review, role="user", cause_by=AskReview))
-            return confirmed
-        return True
+            return review, confirmed
+        return "", True

-    async def _update_plan(self, max_tasks: int = 3):
+    async def _update_plan(self, review: str = "", max_tasks: int = 3):
        plan_confirmed = False
        while not plan_confirmed:
            context = self.get_useful_memories()
@ -167,30 +162,36 @@ class MLEngineer(Role):
            self.working_memory.add(
                Message(content=rsp, role="assistant", cause_by=WritePlan)
            )
-            plan_confirmed = await self._ask_review()
+
+            # TODO: precheck plan before asking reviews
+
+            _, plan_confirmed = await self._ask_review(trigger=ReviewConst.TASK_REVIEW_TRIGGER)

        tasks = WritePlan.rsp_to_tasks(rsp)
-        self.plan.add_tasks(tasks)
-        self.working_memory.clear()
+        if len(tasks) == 1 and self.plan.has_task_id(tasks[0].task_id):
+            self.plan.replace_task(tasks[0])
+        else:
+            self.plan.add_tasks(tasks)
+        self.working_memory.clear()        

    def get_useful_memories(self) -> List[Message]:
        """find useful memories only to reduce context length and improve performance"""

        user_requirement = self.plan.goal
+        data_desc = self.plan.context
        tasks = json.dumps(
            [task.dict() for task in self.plan.tasks], indent=4, ensure_ascii=False
        )
        current_task = self.plan.current_task.json() if self.plan.current_task else {}
        context = STRUCTURAL_CONTEXT.format(
-            user_requirement=user_requirement, tasks=tasks, current_task=current_task
+            user_requirement=user_requirement, data_desc=data_desc, tasks=tasks, current_task=current_task
        )
        context_msg = [Message(content=context, role="user")]

-        return context_msg + self.working_memory.get()
-
-    @property
-    def working_memory(self):
-        return self._rc.memory
+        return context_msg + self.get_working_memories()
+    
+    def get_working_memories(self) -> List[Message]:
+        return self.working_memory.get()


 if __name__ == "__main__":