rm code steps, mv planner, mv ci mle

This commit is contained in:
yzlin 2024-02-04 23:23:18 +08:00
parent 55dac10146
commit 4cfee3ba27
18 changed files with 44 additions and 82 deletions

View file

@ -5,7 +5,7 @@
@File : crawl_webpage.py
"""
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ci.code_interpreter import CodeInterpreter
async def main():

View file

@ -5,7 +5,7 @@
@Author : mannaandpoem
@File : imitate_webpage.py
"""
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ci.code_interpreter import CodeInterpreter
async def main():

View file

@ -4,7 +4,7 @@
# @Desc :
import asyncio
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ci.code_interpreter import CodeInterpreter
async def main(requirement: str = ""):

View file

@ -25,7 +25,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
tool_schemas, tool_type_usage_prompt = await self._prepare_tools(plan=plan)
# ML-specific variables to be used in prompt
code_steps = plan.current_task.code_steps
finished_tasks = plan.get_finished_tasks()
code_context = [remove_comments(task.code) for task in finished_tasks]
code_context = "\n\n".join(code_context)
@ -38,7 +37,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
current_task=plan.current_task.instruction,
column_info=column_info,
tool_type_usage_prompt=tool_type_usage_prompt,
code_steps=code_steps,
tool_schemas=tool_schemas,
)
@ -49,7 +47,6 @@ class WriteCodeWithToolsML(WriteCodeWithTools):
current_task=plan.current_task.instruction,
column_info=column_info,
tool_type_usage_prompt=tool_type_usage_prompt,
code_steps=code_steps,
)
tool_config = create_func_call_config(CODE_GENERATOR_WITH_TOOLS)
rsp = await self.llm.aask_code(prompt, **tool_config)

View file

@ -79,7 +79,6 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
async def _recommend_tool(
self,
task: str,
code_steps: str,
available_tools: dict,
) -> list:
"""
@ -87,7 +86,6 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
Args:
task (str): the task to recommend tools for
code_steps (str): the code steps to generate the full code for the task
available_tools (dict): the available tools description
Returns:
@ -95,7 +93,6 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
"""
prompt = TOOL_RECOMMENDATION_PROMPT.format(
current_task=task,
code_steps=code_steps,
available_tools=available_tools,
)
tool_config = create_func_call_config(SELECT_FUNCTION_TOOLS)
@ -132,8 +129,7 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
available_tools = self._get_tools_by_type(tool_type)
if available_tools:
available_tools = {tool_name: tool.schemas["description"] for tool_name, tool in available_tools.items()}
code_steps = plan.current_task.code_steps
tool_schemas = await self._recommend_tool(plan.current_task.instruction, code_steps, available_tools)
tool_schemas = await self._recommend_tool(plan.current_task.instruction, available_tools)
return tool_schemas, tool_type_usage_prompt

View file

@ -84,15 +84,11 @@ Latest data info after previous tasks:
Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
Specifically, {tool_type_usage_prompt}
# Code Steps:
Strictly follow steps below when you writing code if it's convenient.
{code_steps}
# Output Example:
when current task is "train a lightgbm model on training data", and their are two steps in 'Code Steps', the code be like:
when current task is "train a lightgbm model on training data", the code can be like:
```python
# Step 1: check data type and convert to numeric
ojb_cols = train.select_dtypes(include='object').columns.tolist()
obj_cols = train.select_dtypes(include='object').columns.tolist()
for col in obj_cols:
encoder = LabelEncoder()
@ -107,7 +103,6 @@ model.fit(train, y_train)
# Constraints:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- The output code should contain all steps implemented in 'Code Steps'.
"""
ML_TOOL_USAGE_PROMPT = """
@ -130,10 +125,6 @@ Latest data info after previous tasks:
Write complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.
Specifically, {tool_type_usage_prompt}
# Code Steps:
Strictly follow steps below when you writing code if it's convenient.
{code_steps}
# Capabilities
- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.
- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..
@ -143,7 +134,7 @@ Each Class tool is described in JSON format. When you call a tool, import the to
{tool_schemas}
# Output Example:
when current task is "do data preprocess, like fill missing value, handle outliers, etc.", and their are two steps in 'Code Steps', the code be like:
when current task is "do data preprocess, like fill missing value, handle outliers, etc.", the code can be like:
```python
# Step 1: fill missing value
# Tools used: ['FillMissingValue']
@ -170,6 +161,4 @@ for col in num_cols:
- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.
- Always prioritize using pre-defined tools for the same functionality.
- Always copy the DataFrame before processing it and use the copy to process.
- The output code should contain all steps implemented correctly in 'Code Steps'.
"""
# - If 'Code Steps' contains step done in 'Done Tasks', such as reading data, don't repeat it.

View file

@ -30,8 +30,6 @@ TOOL_RECOMMENDATION_PROMPT = """
## Task
Recommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'.
This is a detailed code steps for current task. You can refer to it when recommending tools.
{code_steps}
## Available Tools:
{available_tools}

View file

@ -14,10 +14,10 @@ The current task is about data preprocessing, please note the following:
FEATURE_ENGINEERING_PROMPT = """
The current task is about feature engineering. when performing it, please adhere to the following principles:
- Generate as diverse features as possible to improve the model's performance step-by-step.
- If potential impactful features are not included in 'Code Steps', add new steps to generate them.
- Use available feature engineering tools if they are potential impactful.
- Avoid creating redundant or excessively numerous features in one step.
- Exclude ID columns from feature generation and remove them.
- Each step do feature engineering to train, must do same for test separately at the same time.
- Each feature engineering operation performed on the train set must also applies to the test separately at the same time.
- Avoid using the label column to create features, except for cat encoding.
- Use the data from previous task result if exist, do not mock or reload data yourself.
"""

View file

@ -2,7 +2,7 @@ from metagpt.actions.ci.debug_code import DebugCode
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.actions.ci.ml_action import UpdateDataColumns, WriteCodeWithToolsML
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ci.code_interpreter import CodeInterpreter
from metagpt.tools.tool_types import ToolTypes
from metagpt.utils.common import any_to_str

View file

@ -33,9 +33,9 @@ from metagpt.actions.add_requirement import UserRequirement
from metagpt.context_mixin import ContextMixin
from metagpt.logs import logger
from metagpt.memory import Memory
from metagpt.plan.planner import Planner
from metagpt.provider import HumanProvider
from metagpt.schema import Message, MessageQueue, SerializationMixin
from metagpt.strategy.planner import Planner
from metagpt.utils.common import any_to_name, any_to_str, role_raise_decorator
from metagpt.utils.project_repo import ProjectRepo
from metagpt.utils.repair_llm_raw_output import extract_state_value_from_output

View file

@ -335,7 +335,6 @@ class Task(BaseModel):
dependent_task_ids: list[str] = [] # Tasks prerequisite to this Task
instruction: str = ""
task_type: str = ""
code_steps: str = ""
code: str = ""
result: str = ""
is_success: bool = False
@ -348,7 +347,6 @@ class Task(BaseModel):
self.is_finished = False
def update_task_result(self, task_result: TaskResult):
self.code_steps = task_result.code_steps
self.code = task_result.code
self.result = task_result.result
self.is_success = task_result.is_success
@ -357,7 +355,6 @@ class Task(BaseModel):
class TaskResult(BaseModel):
"""Result of taking a task, with result and is_success required to be filled"""
code_steps: str = ""
code: str = ""
result: str
is_success: bool

View file

@ -124,11 +124,6 @@ class Planner(BaseModel):
def get_useful_memories(self, task_exclude_field=None) -> list[Message]:
"""find useful memories only to reduce context length and improve performance"""
# TODO dataset description , code steps
if task_exclude_field is None:
# Shorten the context as we don't need code steps after we get the codes.
# This doesn't affect current_task below, which should hold the code steps
task_exclude_field = {"code_steps"}
user_requirement = self.plan.goal
context = self.plan.context
tasks = [task.dict(exclude=task_exclude_field) for task in self.plan.tasks]

File diff suppressed because one or more lines are too long

View file

@ -8,8 +8,8 @@ from metagpt.actions.ci.write_analysis_code import (
WriteCodeWithTools,
)
from metagpt.logs import logger
from metagpt.plan.planner import STRUCTURAL_CONTEXT
from metagpt.schema import Message, Plan, Task
from metagpt.strategy.planner import STRUCTURAL_CONTEXT
@pytest.mark.skip
@ -37,13 +37,12 @@ async def test_write_code_by_list_plan():
@pytest.mark.asyncio
async def test_tool_recommendation():
task = "clean and preprocess the data"
code_steps = ""
available_tools = {
"FillMissingValue": "Filling missing values",
"SplitBins": "Bin continuous data into intervals and return the bin identifier encoded as an integer value",
}
write_code = WriteCodeWithTools()
tools = await write_code._recommend_tool(task, code_steps, available_tools)
tools = await write_code._recommend_tool(task, available_tools)
assert len(tools) == 1
assert "FillMissingValue" in tools

View file

@ -1,7 +1,7 @@
import pytest
from metagpt.logs import logger
from metagpt.roles.code_interpreter import CodeInterpreter
from metagpt.roles.ci.code_interpreter import CodeInterpreter
@pytest.mark.asyncio

View file

@ -2,7 +2,7 @@ import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.logs import logger
from metagpt.roles.ml_engineer import MLEngineer
from metagpt.roles.ci.ml_engineer import MLEngineer
from metagpt.schema import Message, Plan, Task
from metagpt.tools.tool_types import ToolTypes
from tests.metagpt.actions.ci.test_debug_code import CODE, DebugContext, ErrorStr
@ -22,7 +22,6 @@ MockPlan = Plan(
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code_steps="",
code="",
result="",
is_success=False,
@ -35,7 +34,6 @@ MockPlan = Plan(
dependent_task_ids=[],
instruction="Perform exploratory data analysis on the train dataset to understand the features and target variable.",
task_type="eda",
code_steps="",
code="",
result="",
is_success=False,