From a91003a7fe2235609d99e74d6d4a93402fb61fc4 Mon Sep 17 00:00:00 2001 From: Yizhou Chi Date: Mon, 14 Oct 2024 09:56:55 +0800 Subject: [PATCH] disable submission --- expo/data/custom_task.py | 22 +++++++++++++--- expo/experimenter/mle_bench/instructions.py | 29 +++++++++++---------- expo/insights/instruction_generator.py | 3 ++- expo/insights/solution_designer.py | 26 +++++++++++++----- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/expo/data/custom_task.py b/expo/data/custom_task.py index 2bd88abde..14eb6aac2 100644 --- a/expo/data/custom_task.py +++ b/expo/data/custom_task.py @@ -19,20 +19,34 @@ COMPETITION INSTRUCTIONS {task_description} +## More Instructions +- output_dir: {output_dir} +- Besides `submission.csv`, you should also save your output in the output directory. +- Save the prediction results of BOTH the dev set and test set in `dev_predictions.csv` and `test_predictions.csv` respectively in the output directory. + +Do not make visualizations. """ def get_mle_bench_requirements(dataset_dir, data_config, obfuscated=False): + work_dir = data_config["work_dir"] + output_dir = f"{work_dir}/output" + os.makedirs(output_dir, exist_ok=True) + if obfuscated: - instructions = INSTRUCTIONS_OBFUSCATED + instructions = INSTRUCTIONS_OBFUSCATED.format(dataset_dir=dataset_dir, output_dir=output_dir) task_file = "description_obfuscated.md" else: - instructions = INSTRUCTIONS + instructions = INSTRUCTIONS.format(dataset_dir=dataset_dir, output_dir=output_dir) task_file = "description.md" - with open(os.path.join(dataset_dir, task_file)) as f: + with open(os.path.join(dataset_dir, task_file), encoding="utf-8") as f: task_description = f.read() mle_requirement = MLE_REQUIREMENTS.format( - instructions=instructions, additonal_notes=ADDITIONAL_NOTES, task_description=task_description + instructions=instructions, + additonal_notes=ADDITIONAL_NOTES, + task_description=task_description, + output_dir=output_dir, ) + print(mle_requirement) return mle_requirement diff --git a/expo/experimenter/mle_bench/instructions.py b/expo/experimenter/mle_bench/instructions.py index 301b465c1..631f410dc 100644 --- a/expo/experimenter/mle_bench/instructions.py +++ b/expo/experimenter/mle_bench/instructions.py @@ -1,3 +1,6 @@ +SUBMISSION_INSTURCTION = """ +- **Submission checker**: We provide a tool for checking the validity of your submission file, in the form of a server endpoint at `http://localhost:5000/validate`. If you post a submission file to the server (e.g. `curl -X POST -F "file=@${SUBMISSION_FILE}" ${SERVER_URL}`), you will receive a response message telling you if your submission is valid or not (note that this does NOT give you a score). For your convenience, we provide a script `/home/validate_submission.sh` which can help you make this submission to the validation server. +""" INSTRUCTIONS = """ You are participating in *MLE-bench*, an offline version of Kaggle competitions adapted to test the coding ability of agents like yourself. @@ -5,18 +8,17 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `/home/instructions.txt`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`. -- **Competition-specific instructions**: The instructions for the competition are available in `/home/data/description.md`. Read this file to understand the problem setup for the competition you are currently solving. -- **Dataset**: The dataset files for the competition are available in `/home/data/`. -- **Submission**: We have simplified all competition types to use a simple CSV submission format. Regardless of what the competition instructions say, you MUST produce a submission file at `/home/submission/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt. -- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each competition. As such, the dataset you see in `/home/data/` may differ slightly from what is described in the competition instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the competition task on the train/test splits available to you. -- **Submission checker**: We provide a tool for checking the validity of your submission file, in the form of a server endpoint at `http://localhost:5000/validate`. If you post a submission file to the server (e.g. `curl -X POST -F "file=@${SUBMISSION_FILE}" ${SERVER_URL}`), you will receive a response message telling you if your submission is valid or not (note that this does NOT give you a score). For your convenience, we provide a script `/home/validate_submission.sh` which can help you make this submission to the validation server. +- **Competition-specific instructions**: The instructions for the competition are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the competition you are currently solving. +- **Dataset**: The dataset files for the competition are available in `{dataset_dir}`. +- **Submission**: We have simplified all competition types to use a simple CSV submission format. Regardless of what the competition instructions say, you MUST produce a submission file at `{output_dir}/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt. +- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each competition. As such, the dataset you see in `{dataset_dir}` may differ slightly from what is described in the competition instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the competition task on the train/test splits available to you. - **Cheating**: You are not allowed to view others' solutions to this competition. If we detect that your submission is plagiarized from others, the submission will be disqualified. Your submission will be graded based on the metric in the competition-specific instructions. Your goal is to achieve the best score possible on the competition task. -Wherever there is a conflict between the points above and the competition-specific instructions in `/home/data/description.md`, the above points should always take priority since these are always true for our offline benchmark setup. +Wherever there is a conflict between the points above and the competition-specific instructions in `{dataset_dir}/description.md`, the above points should always take priority since these are always true for our offline benchmark setup. """ INSTRUCTIONS_OBFUSCATED = """ @@ -26,18 +28,17 @@ You will be given a machine learning task. You must solve the task by training a BENCHMARK INSTRUCTIONS ------ -First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `/home/instructions.txt`. +First, please read the instructions below which are specific to our benchmark setup. Note, these instructions can be found again in `expo.experimenter.mle_bench.instructions.py`. -- **Task-specific instructions**: The instructions for the task are available in `/home/data/description.md`. Read this file to understand the problem setup for the task you are currently solving. -- **Dataset**: The dataset files for the task are available in `/home/data/`. -- **Submission**: Regardless of what the task instructions say, you MUST produce a submission file at `/home/submission/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt. -- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each task. As such, the dataset you see in `/home/data/` may differ slightly from what is described in the task instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the task on the train/test splits available to you. -- **Submission checker**: We provide a tool for checking the validity of your submission file, in the form of a server endpoint at `http://localhost:5000/validate`. If you post a submission file to the server (e.g. `curl -X POST -F "file=@${SUBMISSION_FILE}" ${SERVER_URL}`), you will receive a response message telling you if your submission is valid or not (note that this does NOT give you a score). For your convenience, we provide a script `/home/validate_submission.sh` which can help you make this submission to the validation server. +- **Task-specific instructions**: The instructions for the task are available in `{dataset_dir}/description.md`. Read this file to understand the problem setup for the task you are currently solving. +- **Dataset**: The dataset files for the task are available in `{dataset_dir}/`. +- **Submission**: Regardless of what the task instructions say, you MUST produce a submission file at `{output_dir}/submission.csv`. We will only evaluate submissions found at this path; failure to submit a `submission.csv` at this path will result in a failed attempt. +- **Test set**: In general, since we do not have access to the original test labels, we have constructed our own train/test split for each task. As such, the dataset you see in `{dataset_dir}` may differ slightly from what is described in the task instructions. Nevertheless the files available to you should otherwise be similar to the original and you should proceed with the task on the train/test splits available to you. - **Cheating**: You are not allowed to view others' solutions to this task. If we detect that your submission is plagiarized from others, the submission will be disqualified. Your submission will be graded based on the metric in the task-specific instructions. Your goal is to achieve the best score possible on the task. -Wherever there is a conflict between the points above and the task-specific instructions in `/home/data/description.md`, the above points should always take priority since these are always true for our offline benchmark setup. +Wherever there is a conflict between the points above and the task-specific instructions in `{dataset_dir}/description.md`, the above points should always take priority since these are always true for our offline benchmark setup. """ ADDITIONAL_NOTES = """ diff --git a/expo/insights/instruction_generator.py b/expo/insights/instruction_generator.py index 7fe5ceece..835c1ff9d 100644 --- a/expo/insights/instruction_generator.py +++ b/expo/insights/instruction_generator.py @@ -38,7 +38,8 @@ class InstructionGenerator: self.state = state self.file_path = state["exp_pool_path"] if state["custom_dataset_dir"]: - self.dataset_info = "xxx" + with open(f"{state['custom_dataset_dir']}/description.md", "r", encoding="utf-8") as file: + self.dataset_info = file.read() else: dataset_info_path = f"{self.data_config['datasets_dir']}/{state['task']}/dataset_info.json" with open(dataset_info_path, "r") as file: diff --git a/expo/insights/solution_designer.py b/expo/insights/solution_designer.py index 2336911db..262caa0f6 100644 --- a/expo/insights/solution_designer.py +++ b/expo/insights/solution_designer.py @@ -5,7 +5,8 @@ from metagpt.llm import LLM DATA_CONFIG = load_data_config() -DATASET_INSIGHT_PROMPT = """ + +DATASET_DESCRIPTION_SELA_PROMPT = """ # Dataset Description {dataset} @@ -14,6 +15,15 @@ DATASET_INSIGHT_PROMPT = """ # Dataset Head {head} +""" + +DATASET_DESCRIPTION_CUSTOM_PROMPT = """ +# Dataset Description +{dataset_description} +""" + +DATASET_INSIGHT_PROMPT = """ +{description} # Instruction Propose insights to help improve the performance of the model on this dataset. @@ -127,11 +137,15 @@ class SolutionDesigner: async def generate_solutions(self, dataset_info, dataset_name, save_analysis_pool=True): llm = LLM() - context = DATASET_INSIGHT_PROMPT.format( - dataset=dataset_info["description"], - metadata=self.metadata_builder(dataset_info["metadata"]), - head=dataset_info["df_head"], - ) + if type(dataset_info) == dict: + description_prompt = DATASET_DESCRIPTION_SELA_PROMPT.format( + dataset=dataset_info["description"], + metadata=self.metadata_builder(dataset_info["metadata"]), + head=dataset_info["df_head"], + ) + else: + description_prompt = DATASET_DESCRIPTION_CUSTOM_PROMPT.format(dataset_description=dataset_info) + context = DATASET_INSIGHT_PROMPT.format(description=description_prompt) rsp = await llm.aask(context) rsp = clean_json_from_rsp(rsp) analysis_pool = self.process_analysis_pool(json.loads(rsp))