1. add ml module

2. fix bug
This commit is contained in:
Yizhou Chi 2024-09-02 14:07:14 +08:00
parent 0b30866d10
commit 0e5db1c364
7 changed files with 21 additions and 14 deletions

View file

@ -269,7 +269,7 @@ class MCTS():
if node not in self.children:
return best_score, best_child
for child in self.children[node]:
score = child.normalized[split]
score = child.normalized_reward[split]
print(child.id, score)
if score > best_score:
best_score = score

View file

@ -25,6 +25,7 @@ ### Run DI RandExp
### Run DI MCTS
`python run_experiment.py --exp_mode mcts --task titanic --rollout 5`
If the dataset has reg metric, remember to use `--low_is_better`
`python run_experiment.py --exp_mode mcts --task househouse_prices --rollout 5 --low_is_better`

View file

@ -24,7 +24,7 @@ TASK_PROMPT = """\
## Saving Dev and Test Predictions
Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory.
Both files should contain a single `target` column with the predicted values.
Both files should contain a single column named `target` with the predicted values.
Make sure the prediction results are in the same format as the target column in the training set. The labels should be transformed back to the original format if any transformation was applied during training.
## Output Training Set Performance

View file

@ -34,12 +34,12 @@ class AugExperimenter(Experimenter):
results = []
for i in range(self.args.num_experiments):
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.use_reflection)
di = ResearchAssistant(node_id=str(i), use_reflection=self.args.reflection)
di.role_dir = f"{di.role_dir}_{self.args.task}"
requirement = user_requirement + EXPS_PROMPT.format(experience=exps[i])
print(requirement)
await di.run(requirement)
score_dict = await di.get_score(low_is_better=False)
score_dict = await di.get_score()
score_dict = self.evaluate(score_dict, state)
results.append({
"idx": i,
@ -47,14 +47,14 @@ class AugExperimenter(Experimenter):
"aug_mode": self.args.aug_mode,
"insights" : exps[i],
"user_requirement": user_requirement,
"args": self.args
"args": vars(self.args)
})
scores = [score_dict["test_score"] for score_dict in scores]
scores = [result["score_dict"]["test_score"] for result in results]
avg_score = sum(scores) / len(scores)
best_score = max(scores) if not self.args.low_is_better else min(scores)
best_score_idx = scores.index(best_score)
results.insert(0, {"avg_score": avg_score, "best_score": best_score, "best_score_idx": best_score_idx})
self.save_results(results)
self.save_result(results)

View file

@ -9,7 +9,7 @@ from expo.research_assistant import ResearchAssistant
class Experimenter:
result_path : str = "results"
result_path : str = "results/base"
data_config = DATA_CONFIG
@ -20,21 +20,21 @@ class Experimenter:
async def run_experiment(self):
state = create_initial_state(self.args.task, start_task_id=1, data_config=self.data_config, low_is_better=self.args.low_is_better, name="")
user_requirement = state["requirement"]
di = ResearchAssistant(node_id="0", use_reflection=self.args.use_reflection)
di = ResearchAssistant(node_id="0", use_reflection=self.args.reflection)
await di.run(user_requirement)
score_dict = await di.get_score(low_is_better=False)
score_dict = await di.get_score()
score_dict = self.evaluate(score_dict, state)
results = {
"score_dict": score_dict,
"aug_mode": self.args.aug_mode,
"user_requirement": user_requirement,
"args": self.args
"args": vars(self.args)
}
self.save_result(results)
def evaluate_prediction(self, split, state):
pred_path = os.path.join(state["work_dir"], state["task"], f"{split}_predictions.csv")
os.makedirs(state["node_dir"], exist_ok=True)
pred_node_path = os.path.join(state["node_dir"], f"{self.start_time}-{split}_predictions.csv")
gt_path = os.path.join(state["datasets_dir"][f"{split}_target"])
preds = pd.read_csv(pred_path)["target"]
@ -53,5 +53,6 @@ class Experimenter:
def save_result(self, result):
with open(f"{self.result_path}/{self.args.task}_{self.start_time}.json", "w") as f:
os.makedirs(self.result_path, exist_ok=True)
with open(f"{self.result_path}/{self.args.exp_mode}-{self.args.task}_{self.start_time}.json", "w") as f:
json.dump(result, f, indent=4)

View file

@ -31,7 +31,7 @@ class MCTSExperimenter(Experimenter):
"dev_best_node_score": dev_best_node.raw_reward,
"num_generated_codes": num_generated_codes,
"user_requirement": best_node.state["requirement"],
"args": self.args
"args": vars(self.args)
}
self.save_result(results)

View file

@ -80,3 +80,8 @@ boto3~=1.34.69
spark_ai_python~=0.3.30
agentops
openml==0.14.2
# ml module to run in DI
xgboost
catboost
lightgbm