code adapted to v0.6

This commit is contained in:
yzlin 2024-01-10 17:20:01 +08:00
parent 4ec6151691
commit cd990fd5c9
15 changed files with 80 additions and 77 deletions

View file

@ -30,7 +30,7 @@ class AskReview(Action):
)
logger.info("most recent context:")
latest_action = context[-1].cause_by.__name__ if context[-1].cause_by else ""
latest_action = context[-1].cause_by if context[-1].cause_by else ""
review_instruction = (
ReviewConst.TASK_REVIEW_INSTRUCTION
if trigger == ReviewConst.TASK_REVIEW_TRIGGER

View file

@ -1,4 +1,4 @@
from typing import Any, List, Optional
from typing import List
from metagpt.actions.write_analysis_code import BaseWriteAnalysisCode
from metagpt.logs import logger
@ -82,11 +82,6 @@ def messages_to_str(messages: List[Message]) -> str:
class DebugCode(BaseWriteAnalysisCode):
name: str = "debugcode"
context: Optional[str] = None
llm: None
def __init__(self, **kwargs: Any):
super().__init__(**kwargs)
async def run_reflection(
self,

View file

@ -8,7 +8,7 @@ import re
import traceback
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Tuple, Union
from typing import Any, Dict, List, Tuple, Union
import nbformat
from nbclient import NotebookClient
@ -48,23 +48,25 @@ class ExecuteCode(ABC):
class ExecutePyCode(ExecuteCode, Action):
"""execute code, return result to llm, and display it."""
nb: Any
nb_client: Any
console: Console
interaction: str
timeout: int = 600
def __init__(
self,
name: str = "python_executor",
context=None,
llm=None,
nb=None,
timeout: int = 600,
timeout=600,
):
super().__init__(name, context, llm)
if nb is None:
self.nb = nbformat.v4.new_notebook()
else:
self.nb = nb
self.timeout = timeout
self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
self.console = Console()
self.interaction = "ipython" if self.is_ipython() else "terminal"
nb = nb or nbformat.v4.new_notebook()
super().__init__(
nb=nb,
nb_client=NotebookClient(nb, timeout=timeout),
timeout=timeout,
console=Console(),
interaction=("ipython" if self.is_ipython() else "terminal"),
)
async def build(self):
if self.nb_client.kc is None or not await self.nb_client.kc.is_alive():

View file

@ -7,16 +7,13 @@ from metagpt.utils.common import CodeParser, create_func_config, remove_comments
class SummarizeAnalysis(Action):
PROMPT_TEMPLATE = """
PROMPT_TEMPLATE: str = """
# Context
{context}
# Summary
Output a 30-word summary on analysis tool and modeling algorithms you have used, and the corresponding result. Make sure to announce the complete path to your test prediction file. Your summary:
"""
def __init__(self, name: str = "", context=None, llm=None) -> str:
super().__init__(name, context, llm)
async def run(self, conmpleted_plan: Plan) -> str:
tasks = json.dumps(
[task.dict() for task in conmpleted_plan.tasks],
@ -29,7 +26,7 @@ class SummarizeAnalysis(Action):
class Reflect(Action):
PROMPT_TEMPLATE = """
PROMPT_TEMPLATE: str = """
# Context
__context__
# Latest User Requirement
@ -45,7 +42,7 @@ class Reflect(Action):
}
```
"""
REWRITE_PLAN_INSTRUCTION = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
REWRITE_PLAN_INSTRUCTION: str = """Take this reflection for rewriting plan, modify the current plan in place, make reference to your specific instruction, think about you should
change which task, add or delete what tasks in the plan. Only make necessary changes, keep reusable tasks unchanged, output the COMPLETE new plan starting from the first task. Your plan should have no more than 5 tasks."""
async def run(self, context: str, user_requirement: str = "") -> str:

View file

@ -28,7 +28,7 @@ from metagpt.utils.common import create_func_config, remove_comments
class BaseWriteAnalysisCode(Action):
DEFAULT_SYSTEM_MSG = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
DEFAULT_SYSTEM_MSG: str = """You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**""" # prompt reference: https://github.com/KillianLucas/open-interpreter/blob/v0.1.4/interpreter/system_message.txt
# REUSE_CODE_INSTRUCTION = """ATTENTION: DONT include codes from previous tasks in your current code block, include new codes only, DONT repeat codes!"""
def process_msg(self, prompt: Union[str, List[Dict], Message, List[Message]], system_msg: str = None):
@ -76,9 +76,6 @@ class BaseWriteAnalysisCode(Action):
class WriteCodeByGenerate(BaseWriteAnalysisCode):
"""Write code fully by generation"""
def __init__(self, name: str = "", context=None, llm=None) -> str:
super().__init__(name, context, llm)
async def run(
self,
context: [List[Message]],
@ -95,12 +92,14 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
class WriteCodeWithTools(BaseWriteAnalysisCode):
"""Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
def __init__(self, name: str = "", context=None, llm=None, schema_path=None):
super().__init__(name, context, llm)
self.schema_path = schema_path
self.available_tools = {}
schema_path: str = ""
available_tools: dict = {}
if self.schema_path is not None:
def __init__(self, schema_path="", **kwargs):
super().__init__(**kwargs)
self.schema_path = schema_path
if schema_path:
self._load_tools(schema_path)
def _load_tools(self, schema_path, schema_module=None):
@ -223,7 +222,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
class MakeTools(WriteCodeByGenerate):
DEFAULT_SYSTEM_MSG = """Convert any codes provied for you to a very General Function Code startswith `def`.\n
DEFAULT_SYSTEM_MSG: str = """Convert any codes provied for you to a very General Function Code startswith `def`.\n
**Notice:
1. Your code must contain a general function start with `def`.
2. Refactor your code to get the most efficient implementation for large input data in the shortest amount of time.

View file

@ -16,7 +16,7 @@ from metagpt.utils.common import CodeParser, create_func_config
class WritePlan(Action):
PROMPT_TEMPLATE = """
PROMPT_TEMPLATE: str = """
# Context:
__context__
# Task:

View file

@ -1,5 +1,7 @@
import json
from pydantic import BaseModel, Field
from metagpt.actions.ask_review import AskReview, ReviewConst
from metagpt.actions.write_plan import (
WritePlan,
@ -22,14 +24,17 @@ STRUCTURAL_CONTEXT = """
"""
class Planner:
def __init__(self, goal: str, working_memory: Memory, auto_run: bool = False, use_tools: bool = False):
self.plan = Plan(goal=goal)
self.auto_run = auto_run
self.use_tools = use_tools
class Planner(BaseModel):
plan: Plan
working_memory: Memory = Field(
default_factory=Memory
) # memory for working on each task, discarded each time a task is done
auto_run: bool = False
use_tools: bool = False
# memory for working on each task, discarded each time a task is done
self.working_memory = working_memory
def __init__(self, goal: str, **kwargs):
plan = Plan(goal=goal)
super().__init__(plan=plan, **kwargs)
@property
def current_task(self):

View file

@ -1,5 +1,7 @@
from datetime import datetime
from pydantic import Field
from metagpt.actions.ask_review import ReviewConst
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.actions.write_analysis_code import WriteCodeByGenerate
@ -10,6 +12,8 @@ from metagpt.utils.save_code import save_code_file
class CodeInterpreter(Role):
execute_code: ExecutePyCode = Field(default_factory=ExecutePyCode, exclude=True)
def __init__(
self,
name="Charlie",
@ -20,11 +24,10 @@ class CodeInterpreter(Role):
):
super().__init__(name=name, profile=profile, goal=goal)
self._set_react_mode(react_mode="plan_and_act", auto_run=auto_run, use_tools=use_tools)
self.execute_code = ExecutePyCode()
@property
def working_memory(self):
return self._rc.working_memory
return self.rc.working_memory
async def _plan_and_act(self):
rsp = await super()._plan_and_act()

View file

@ -5,10 +5,9 @@ import subprocess
import fire
import pandas as pd
from metagpt.actions import Action, BossRequirement
from metagpt.actions import Action, UserRequirement
from metagpt.actions.ml_da_action import SummarizeAnalysis
from metagpt.config import CONFIG
from metagpt.const import WORKSPACE_ROOT
from metagpt.logs import logger
from metagpt.roles import Role
from metagpt.schema import Message
@ -31,7 +30,7 @@ def run_command(cmd):
class DownloadData(Action):
async def run(self, competition, data_desc="") -> str:
data_path = WORKSPACE_ROOT / competition
data_path = CONFIG.workspace_path / competition
output = run_command(f"kaggle competitions list --search {competition}")
assert output != "No competitions found", "You must provide the correct competition name"
@ -41,7 +40,7 @@ class DownloadData(Action):
if not os.path.exists(data_path):
# if True:
# run_command(f"rm -r {data_path / '*'}")
run_command(f"unzip -o {WORKSPACE_ROOT / '*.zip'} -d {data_path}") # FIXME: not safe
run_command(f"unzip -o {CONFIG.workspace_path / '*.zip'} -d {data_path}") # FIXME: not safe
file_list = run_command(f"ls {data_path}")
@ -55,7 +54,7 @@ class DownloadData(Action):
class SubmitResult(Action):
PROMPT_TEMPLATE = """
PROMPT_TEMPLATE: str = """
# Summary
__summary__
# Your task
@ -78,7 +77,7 @@ class SubmitResult(Action):
async def run(self, competition, submit_message="") -> str:
submit_file_path = await self._parse_submit_file_path(submit_message)
data_path = WORKSPACE_ROOT / competition
data_path = CONFIG.workspace_path / competition
submit_message = submit_message.replace("'", "")
run_command(f"kaggle competitions submit {competition} -f {submit_file_path} -m '{submit_message}'")
@ -108,20 +107,20 @@ class KaggleManager(Role):
def __init__(self, name="ABC", profile="KaggleManager", goal="", competition="titanic", data_desc=""):
super().__init__(name=name, profile=profile, goal=goal)
self._init_actions([DownloadData, SubmitResult])
self._watch([BossRequirement, SummarizeAnalysis])
self._watch([UserRequirement, SummarizeAnalysis])
self.competition = competition
self.data_desc = data_desc # currently passed in, later can be scrapped down from web by another Role
async def _think(self):
observed = self.get_memories()[-1].cause_by
if observed == BossRequirement:
if observed == UserRequirement:
self._set_state(0) # DownloadData, get competition of interest from human, download datasets
elif observed == SummarizeAnalysis:
self._set_state(1) # SubmitResult, get prediction from MLEngineer and submit it to Kaggle
async def _act(self):
todo = self._rc.todo
logger.info(f"{self._setting}: ready to {self._rc.todo}")
todo = self.rc.todo
logger.info(f"{self._setting}: ready to {self.rc.todo}")
if isinstance(todo, DownloadData):
rsp = await todo.run(self.competition, self.data_desc)
@ -148,7 +147,7 @@ if __name__ == "__main__":
async def main(requirement: str = requirement):
role = KaggleManager(competition=competition, data_desc=data_desc)
# await role.run(Message(content="", cause_by=BossRequirement))
# await role.run(Message(content="", cause_by=UserRequirement))
await role.run(Message(content=summary, cause_by=SummarizeAnalysis))
fire.Fire(main)

View file

@ -10,7 +10,7 @@ from metagpt.actions.write_analysis_code import (
WriteCodeWithTools,
)
from metagpt.actions.write_code_steps import WriteCodeSteps
from metagpt.const import PROJECT_ROOT
from metagpt.const import METAGPT_ROOT
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.kaggle_manager import DownloadData, SubmitResult
@ -20,6 +20,13 @@ from metagpt.utils.common import remove_comments
class MLEngineer(CodeInterpreter):
auto_run: bool = False
use_tools: bool = False
use_code_steps: bool = False
make_udfs: bool = False # whether to save user-defined functions
use_udfs: bool = False
data_desc: dict = {}
def __init__(
self,
name="Mark",
@ -32,13 +39,12 @@ class MLEngineer(CodeInterpreter):
use_udfs=False,
):
super().__init__(name=name, profile=profile, goal=goal, auto_run=auto_run, use_tools=use_tools)
self._watch([DownloadData, SubmitResult])
self.auto_run = auto_run
self.use_tools = use_tools
self.use_code_steps = use_code_steps
self.make_udfs = make_udfs # user-defined functions
self.make_udfs = make_udfs
self.use_udfs = use_udfs
self.data_desc = {}
# self._watch([DownloadData, SubmitResult]) # in multi-agent settings
async def _plan_and_act(self):
### Actions in a multi-agent multi-turn setting, a new attempt on the data ###
@ -60,7 +66,7 @@ class MLEngineer(CodeInterpreter):
### summarize analysis ###
summary = await SummarizeAnalysis().run(self.planner.plan)
rsp = Message(content=summary, cause_by=SummarizeAnalysis)
self._rc.memory.add(rsp)
self.rc.memory.add(rsp)
return rsp
@ -108,7 +114,7 @@ class MLEngineer(CodeInterpreter):
self.planner.current_task.task_type = "udf"
schema_path = UDFS_YAML
else:
schema_path = PROJECT_ROOT / "metagpt/tools/functions/schemas"
schema_path = METAGPT_ROOT / "metagpt/tools/functions/schemas"
tool_context, code = await WriteCodeWithTools(schema_path=schema_path).run(
context=context,
plan=self.planner.plan,

View file

@ -95,7 +95,7 @@ class MLEngineerSimple(Role):
counter = 0 # redo the task again with help of human suggestions
completed_plan_memory = self.get_useful_memories() # completed plan as a outcome
self._rc.memory.add(completed_plan_memory[0]) # add to persistent memory
self.rc.memory.add(completed_plan_memory[0]) # add to persistent memory
prompt = JUDGE_PROMPT_TEMPLATE.format(user_requirement=self.goal, context=completed_plan_memory)
rsp = await self._llm.aask(prompt)
self.working_memory.add(Message(content=rsp, role="system"))

View file

@ -146,6 +146,7 @@ class Role(SerializationMixin, is_polymorphic_base=True):
actions: list[SerializeAsAny[Action]] = Field(default=[], validate_default=True)
rc: RoleContext = Field(default_factory=RoleContext)
subscription: set[str] = set()
planner: Planner = None
# builtin variables
recovered: bool = False # to tag if a recovered role
@ -173,7 +174,6 @@ class Role(SerializationMixin, is_polymorphic_base=True):
self.llm.system_prompt = self._get_prefix()
self._watch(data.get("watch") or [UserRequirement])
self.planner = None
def _reset(self):
self.states = []
@ -270,7 +270,7 @@ class Role(SerializationMixin, is_polymorphic_base=True):
self.rc.max_react_loop = max_react_loop
elif react_mode == RoleReactMode.PLAN_AND_ACT:
self.planner = Planner(
goal=self._setting.goal, working_memory=self.rc.working_memory, auto_run=auto_run, use_tools=use_tools
goal=self.goal, working_memory=self.rc.working_memory, auto_run=auto_run, use_tools=use_tools
)
def _watch(self, actions: Iterable[Type[Action]] | Iterable[Action]):

View file

@ -337,7 +337,7 @@ class Plan(BaseModel):
context: str = ""
tasks: list[Task] = []
task_map: dict[str, Task] = {}
current_task_id = ""
current_task_id: str = ""
def _topological_sort(self, tasks: list[Task]):
task_map = {task.task_id: task for task in tasks}

View file

@ -1,8 +1,4 @@
from metagpt.actions.write_plan import (
Plan,
Task,
precheck_update_plan_from_rsp,
)
from metagpt.actions.write_plan import Plan, Task, precheck_update_plan_from_rsp
def test_precheck_update_plan_from_rsp():

View file

@ -1,6 +1,7 @@
import fire
from metagpt.actions.execute_code import ExecutePyCode
from metagpt.const import DATA_PATH
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ml_engineer import MLEngineer
@ -53,10 +54,10 @@ async def run_code_interpreter(
if __name__ == "__main__":
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
# requirement = "Run data analysis on sklearn Iris dataset, include a plot"
# requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy"
# data_path = f"{DATA_PATH}/titanic"
# requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
data_path = f"{DATA_PATH}/titanic"
requirement = f"This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: '{data_path}/split_train.csv', eval data path: '{data_path}/split_eval.csv'."
# data_path = f"{DATA_PATH}/icr-identify-age-related-conditions"
# requirement = f"This is a medical dataset with over fifty anonymized health characteristics linked to three age-related conditions. Your goal is to predict whether a subject has or has not been diagnosed with one of these conditions.The target column is Class. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report f1 score on the eval data. Train data path: {data_path}/split_train.csv, eval data path: {data_path}/split_eval.csv."
# data_path = f"{DATA_PATH}/santander-customer-transaction-prediction"