mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-11 15:15:18 +02:00
1. change data.yaml to more generalized path
2. correct import
This commit is contained in:
parent
211f758b53
commit
d14f07f9b1
10 changed files with 76 additions and 56 deletions
|
|
@ -4,9 +4,9 @@ import os
|
|||
import pandas as pd
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
from expo.insights.InsightGenerate import InsightGenerator
|
||||
from expo.dataset import get_split_dataset_path
|
||||
from expo.dataset import get_split_dataset_path, generate_task_requirement
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
from expo.utils import mcts_logger, load_execute_notebook, generate_task_requirement, get_exp_pool_path
|
||||
from expo.utils import mcts_logger, load_execute_notebook, get_exp_pool_path
|
||||
|
||||
from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
|
||||
from metagpt.utils.common import write_json_file, read_json_file, format_trackback_info
|
||||
|
|
|
|||
|
|
@ -152,6 +152,6 @@ datasets:
|
|||
\ eval data. Do not plot or make any visualizations.\n"
|
||||
|
||||
|
||||
work_dir: D:/work/MG-open/MetaGPT/workspace # path to the workspace directory
|
||||
work_dir: ../workspace # path to the workspace directory
|
||||
role_dir: storage/team/environment/roles/ResearchAssistant_David
|
||||
# analysis_pool_dir: D:/work/MG-open/MetaGPT/examples/MCTS_test/analysis_pool_sample.json
|
||||
|
|
@ -5,7 +5,7 @@ import os
|
|||
import json
|
||||
import yaml
|
||||
import pandas as pd
|
||||
from examples.MCTS_test.insights.solution_designer import SolutionDesigner
|
||||
from expo.insights.solution_designer import SolutionDesigner
|
||||
import asyncio
|
||||
|
||||
BASE_USER_REQUIREMENT = """\
|
||||
|
|
@ -14,6 +14,35 @@ Perform data analysis, data preprocessing, feature engineering, and modeling to
|
|||
Report {metric} on the eval data. Do not plot or make any visualizations.
|
||||
"""
|
||||
|
||||
TASK_PROMPT = """\
|
||||
# User requirement
|
||||
{user_requirement}
|
||||
**Attention** Please do not leak the target label in any form during training.
|
||||
|
||||
## Saving Dev and Test Predictions
|
||||
Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results.
|
||||
The file should contain a single `target` column with the predicted values.
|
||||
Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
|
||||
|
||||
## Output Training Set Performance
|
||||
Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
|
||||
Print the training set performance in the last step. Write in this format:
|
||||
```python
|
||||
...
|
||||
print("Train score:", train_score)
|
||||
```
|
||||
|
||||
# Data dir
|
||||
training: {train_path}
|
||||
dev: {dev_path}
|
||||
testing: {test_path}
|
||||
|
||||
# Output dir
|
||||
{output_dir}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
SEED = 100
|
||||
TRAIN_TEST_SPLIT = 0.8
|
||||
TRAIN_DEV_SPLIT = 0.75
|
||||
|
|
@ -89,6 +118,20 @@ def create_dataset_dict(dataset):
|
|||
}
|
||||
return dataset_dict
|
||||
|
||||
def generate_task_requirement(task_name, data_config):
|
||||
user_requirement = get_user_requirement(task_name, data_config)
|
||||
split_dataset_path = get_split_dataset_path(task_name, data_config)
|
||||
train_path = split_dataset_path["train"]
|
||||
dev_path = split_dataset_path["dev_wo_target"]
|
||||
test_path = split_dataset_path["test_wo_target"]
|
||||
work_dir = data_config["work_dir"]
|
||||
output_dir = f"{work_dir}/{task_name}"
|
||||
user_requirement = TASK_PROMPT.format(user_requirement=user_requirement,
|
||||
train_path=train_path, dev_path=dev_path, test_path=test_path,
|
||||
output_dir=output_dir)
|
||||
return user_requirement
|
||||
|
||||
|
||||
class ExpDataset:
|
||||
description : str = None
|
||||
metadata : dict = None
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import random
|
|||
import json
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.schema import Message
|
||||
from examples.MCTS_test.utils import load_data_config, mcts_logger
|
||||
from expo.utils import load_data_config, mcts_logger
|
||||
DATA_CONFIG = load_data_config()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ import random
|
|||
import json
|
||||
from metagpt.llm import LLM
|
||||
from metagpt.schema import Message
|
||||
from examples.MCTS_test.utils import clean_json_from_rsp, load_data_config
|
||||
from expo.utils import clean_json_from_rsp, load_data_config
|
||||
|
||||
|
||||
DATA_CONFIG = load_data_config()
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ from metagpt.utils.common import write_json_file, read_json_file, format_trackba
|
|||
from metagpt.const import MESSAGE_ROUTE_TO_ALL, SERDESER_PATH
|
||||
from metagpt.utils.recovery_util import save_history
|
||||
from expo.utils import mcts_logger, save_notebook
|
||||
from pydantic import Field, model_validator
|
||||
from metagpt.actions.di.write_analysis_code import CheckData, WriteAnalysisCode
|
||||
|
||||
import re
|
||||
import os
|
||||
|
||||
|
|
@ -84,6 +87,17 @@ class ResearchAssistant(DataInterpreter):
|
|||
json_block = CodeParser.parse_code(block=None, text=rsp)
|
||||
score_dict = json.loads(json_block)
|
||||
return score_dict
|
||||
|
||||
|
||||
@model_validator(mode="after")
|
||||
def set_plan_and_tool(self) -> "Interpreter":
|
||||
if self.planner.plan.goal != '':
|
||||
self.set_actions([WriteAnalysisCode])
|
||||
self._set_state(0)
|
||||
print("Plan already exists, skipping initialization.")
|
||||
return self
|
||||
print("Initializing plan and tool...")
|
||||
return super().set_plan_and_tool()
|
||||
|
||||
async def _act_on_task(self, current_task: Task) -> TaskResult:
|
||||
"""Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation."""
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
import os
|
||||
from metagpt.roles.di.research_assistant import ResearchAssistant
|
||||
from expo.research_assistant import ResearchAssistant
|
||||
import asyncio
|
||||
from examples.MCTS_test.utils import DATA_CONFIG, generate_task_requirement, get_exp_pool_path
|
||||
from examples.MCTS_test.insights.InsightGenerate import InsightGenerator
|
||||
from examples.MCTS_test.MCTS import create_initial_state
|
||||
from examples.MCTS_test.evaluation.evaluation import evaluate_score
|
||||
from expo.utils import DATA_CONFIG, get_exp_pool_path
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.insights.InsightGenerate import InsightGenerator
|
||||
from expo.MCTS import create_initial_state
|
||||
from expo.evaluation.evaluation import evaluate_score
|
||||
import json
|
||||
import argparse
|
||||
import pandas as pd
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from examples.MCTS_test.MCTS import MCTS, Node, initialize_di_root_node
|
||||
from examples.MCTS_test.utils import load_data_config, generate_task_requirement
|
||||
from examples.MCTS_test.visualize_mcts import get_tree_text
|
||||
from expo.MCTS import MCTS, Node, initialize_di_root_node
|
||||
from expo.utils import load_data_config
|
||||
from expo.dataset import generate_task_requirement
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
import asyncio
|
||||
import argparse
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
from expo.MCTS import MCTS, Node, initialize_di_root_node
|
||||
from expo.utils import load_data_config, generate_task_requirement
|
||||
from expo.utils import load_data_config
|
||||
from expo.dataset import generate_task_requirement
|
||||
|
||||
from expo.evaluation.visualize_mcts import get_tree_text
|
||||
import asyncio
|
||||
import argparse
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
import yaml
|
||||
from examples.MCTS_test.dataset import get_user_requirement, get_split_dataset_path
|
||||
from metagpt.roles.role import Role
|
||||
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
|
||||
from metagpt.utils.save_code import save_code_file
|
||||
|
|
@ -13,34 +12,6 @@ import sys
|
|||
import os
|
||||
import re
|
||||
|
||||
TASK_PROMPT = """\
|
||||
# User requirement
|
||||
{user_requirement}
|
||||
**Attention** Please do not leak the target label in any form during training.
|
||||
|
||||
## Saving Dev and Test Predictions
|
||||
Save the prediction results of the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory BEFORE printig out the results.
|
||||
The file should contain a single `target` column with the predicted values.
|
||||
Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
|
||||
|
||||
## Output Training Set Performance
|
||||
Make sure the performance of the model is printed in python in the last step even if it has been printed in the previous steps. The value should be a float number.
|
||||
Print the training set performance in the last step. Write in this format:
|
||||
```python
|
||||
...
|
||||
print("Train score:", train_score)
|
||||
```
|
||||
|
||||
# Data dir
|
||||
training: {train_path}
|
||||
dev: {dev_path}
|
||||
testing: {test_path}
|
||||
|
||||
# Output dir
|
||||
{output_dir}
|
||||
|
||||
"""
|
||||
|
||||
def load_data_config(file_path="data.yaml"):
|
||||
with open(file_path, 'r') as stream:
|
||||
data_config = yaml.safe_load(stream)
|
||||
|
|
@ -78,18 +49,6 @@ def get_exp_pool_path(task_name, data_config, pool_name="analysis_pool"):
|
|||
exp_pool_path = os.path.join(data_path, f"{pool_name}.json")
|
||||
return exp_pool_path
|
||||
|
||||
def generate_task_requirement(task_name, data_config):
|
||||
user_requirement = get_user_requirement(task_name, data_config)
|
||||
split_dataset_path = get_split_dataset_path(task_name, data_config)
|
||||
train_path = split_dataset_path["train"]
|
||||
dev_path = split_dataset_path["dev_wo_target"]
|
||||
test_path = split_dataset_path["test_wo_target"]
|
||||
work_dir = data_config["work_dir"]
|
||||
output_dir = f"{work_dir}/{task_name}"
|
||||
user_requirement = TASK_PROMPT.format(user_requirement=user_requirement,
|
||||
train_path=train_path, dev_path=dev_path, test_path=test_path,
|
||||
output_dir=output_dir)
|
||||
return user_requirement
|
||||
|
||||
def change_plan(role, plan):
|
||||
print(f"Change next plan to: {plan}")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue