diff --git a/.gitignore b/.gitignore index e03eab3d3..9b679d48a 100644 --- a/.gitignore +++ b/.gitignore @@ -164,3 +164,4 @@ tmp output.wav metagpt/roles/idea_agent.py .aider* +/tests/metagpt/actions/check_data.py diff --git a/config/config.yaml b/config/config.yaml index bf998def7..bed67083c 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -94,7 +94,4 @@ MODEL_FOR_RESEARCHER_REPORT: gpt-3.5-turbo-16k ### browser path for pyppeteer engine, support Chrome, Chromium,MS Edge #PYPPETEER_EXECUTABLE_PATH: "/usr/bin/google-chrome-stable" -PROMPT_FORMAT: json #json or markdown - -# KAGGLE_USERNAME: "" -# KAGGLE_KEY: "" \ No newline at end of file +PROMPT_FORMAT: json #json or markdown \ No newline at end of file diff --git a/metagpt/roles/ml_engineer.py b/metagpt/roles/ml_engineer.py index de649e857..e7fe38ff4 100644 --- a/metagpt/roles/ml_engineer.py +++ b/metagpt/roles/ml_engineer.py @@ -1,13 +1,11 @@ -from typing import Dict, List, Union +from typing import List import json -import subprocess +from datetime import datetime import fire -import re from metagpt.roles import Role -from metagpt.actions import Action -from metagpt.schema import Message, Task, Plan +from metagpt.schema import Message, Plan from metagpt.memory import Memory from metagpt.logs import logger from metagpt.actions.write_plan import WritePlan, update_plan_from_rsp, precheck_update_plan_from_rsp @@ -17,6 +15,7 @@ from metagpt.actions.execute_code import ExecutePyCode from metagpt.roles.kaggle_manager import DownloadData, SubmitResult from metagpt.prompts.ml_engineer import STRUCTURAL_CONTEXT from metagpt.actions.write_code_steps import WriteCodeSteps +from metagpt.utils.save_code import save_code_file class MLEngineer(Role): def __init__( @@ -99,6 +98,9 @@ class MLEngineer(Role): rsp = Message(content=summary, cause_by=SummarizeAnalysis) self._rc.memory.add(rsp) + # save code using datetime.now or keywords related to the goal of your project (plan.goal). + project_record = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + save_code_file(name=project_record, code_context=self.execute_code.nb, file_format="ipynb") return rsp async def _write_and_exec_code(self, max_retry: int = 3): @@ -223,14 +225,13 @@ class MLEngineer(Role): if __name__ == "__main__": - # requirement = "Run data analysis on sklearn Iris dataset, include a plot" + requirement = "Run data analysis on sklearn Iris dataset, include a plot" # requirement = "Run data analysis on sklearn Diabetes dataset, include a plot" # requirement = "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy" # requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy" # requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv" - requirement = "This is a house price dataset, your goal is to predict the sale price of a property based on its features. The target column is SalePrice. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSE between the logarithm of the predicted value and the logarithm of the observed sales price on the eval data. Train data path: 'workspace/house-prices-advanced-regression-techniques/split_train.csv', eval data path: 'workspace/house-prices-advanced-regression-techniques/split_eval.csv'." - async def main(requirement: str = requirement, auto_run: bool = False): + async def main(requirement: str = requirement, auto_run: bool = True): role = MLEngineer(goal=requirement, auto_run=auto_run) await role.run(requirement) diff --git a/metagpt/utils/save_code.py b/metagpt/utils/save_code.py new file mode 100644 index 000000000..96c310336 --- /dev/null +++ b/metagpt/utils/save_code.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +# @Date : 12/12/2023 4:14 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import os +import json + +import nbformat + +from metagpt.const import DATA_PATH + +def save_code_file(name: str, code_context: str, file_format: str = "py") -> None: + """ + Save code files to a specified path. + + Args: + - name (str): The name of the folder to save the files. + - code_context (str): The code content. + - file_format (str, optional): The file format. Supports 'py' (Python file), 'json' (JSON file), and 'ipynb' (Jupyter Notebook file). Default is 'py'. + + + Returns: + - None + """ + # Create the folder path if it doesn't exist + os.makedirs(name=DATA_PATH / "output" / f"{name}", exist_ok=True) + + # Choose to save as a Python file or a JSON file based on the file format + file_path = DATA_PATH / "output" / f"{name}/code.{file_format}" + if file_format == "py": + with open(file_path, "w", encoding="utf-8") as fp: + fp.write(code_context + "\n\n") + elif file_format == "json": + # Parse the code content as JSON and save + data = {"code": code_context} + with open(file_path, "w", encoding="utf-8") as fp: + json.dump(data, fp, indent=2) + elif file_format == "ipynb": + nbformat.write(code_context, file_path) + else: + raise ValueError("Unsupported file format. Please choose 'py', 'json', or 'ipynb'.") + + + + diff --git a/tests/metagpt/utils/test_save_code.py b/tests/metagpt/utils/test_save_code.py new file mode 100644 index 000000000..60a9e1ff4 --- /dev/null +++ b/tests/metagpt/utils/test_save_code.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +# @Date : 12/12/2023 4:17 PM +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import pytest +import os +import json +import nbformat + +from metagpt.actions.write_analysis_code import WriteCodeByGenerate +from metagpt.actions.execute_code import ExecutePyCode + +from metagpt.utils.save_code import save_code_file, DATA_PATH + + +def test_save_code_file_python(): + save_code_file("example", "print('Hello, World!')") + file_path = DATA_PATH / "output" / "example" / "code.py" + assert os.path.exists(file_path), f"File does not exist: {file_path}" + + +def test_save_code_file_python(): + save_code_file("example", "print('Hello, World!')") + file_path = DATA_PATH / "output" / "example" / "code.py" + with open(file_path, "r", encoding="utf-8") as fp: + content = fp.read() + assert "print('Hello, World!')" in content, "File content does not match" + + +def test_save_code_file_json(): + save_code_file("example_json", "print('Hello, JSON!')", file_format="json") + file_path = DATA_PATH / "output" / "example_json" / "code.json" + with open(file_path, "r", encoding="utf-8") as fp: + data = json.load(fp) + assert "code" in data, "JSON key 'code' is missing" + assert data["code"] == "print('Hello, JSON!')", "JSON content does not match" + + + +@pytest.mark.asyncio +async def test_save_code_file_notebook(): + code = await WriteCodeByGenerate().run( + context="basic python, hello world", plan="", code_steps="", temperature=0.0 + ) + executor = ExecutePyCode() + await executor.run(code) + # Save as a Notebook file + save_code_file("example_nb", executor.nb, file_format="ipynb") + file_path = DATA_PATH / "output" / "example_nb" / "code.ipynb" + assert os.path.exists(file_path), f"Notebook file does not exist: {file_path}" + + # Additional checks specific to notebook format + notebook = nbformat.read(file_path, as_version=4) + assert len(notebook.cells) > 0, "Notebook should have at least one cell" + first_cell_source = notebook.cells[0].source + assert "print('Hello, World!')" in first_cell_source, "Notebook cell content does not match"