From 1e44d7020295574b82e199ac379d7704bad51a01 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 17:28:46 +0800 Subject: [PATCH 1/8] update run_api for di --- metagpt/roles/di/data_interpreter.py | 80 +++++++++++++++------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py index a8534b710..16875a42e 100644 --- a/metagpt/roles/di/data_interpreter.py +++ b/metagpt/roles/di/data_interpreter.py @@ -43,66 +43,66 @@ class DataInterpreter(Role): tool_recommender: ToolRecommender = None react_mode: Literal["plan_and_act", "react"] = "plan_and_act" max_react_loop: int = 10 # used for react mode - + @model_validator(mode="after") def set_plan_and_tool(self) -> "Interpreter": self._set_react_mode(react_mode=self.react_mode, max_react_loop=self.max_react_loop, auto_run=self.auto_run) self.use_plan = ( - self.react_mode == "plan_and_act" + self.react_mode == "plan_and_act" ) # create a flag for convenience, overwrite any passed-in value if self.tools: self.tool_recommender = BM25ToolRecommender(tools=self.tools) self.set_actions([WriteAnalysisCode]) self._set_state(0) return self - + @property def working_memory(self): return self.rc.working_memory - + async def _think(self) -> bool: """Useful in 'react' mode. Use LLM to decide whether and what to do next.""" user_requirement = self.get_memories()[0].content context = self.working_memory.get() - + if not context: # just started the run, we need action certainly self.working_memory.add(self.get_memories()[0]) # add user requirement to working memory self._set_state(0) return True - + prompt = REACT_THINK_PROMPT.format(user_requirement=user_requirement, context=context) rsp = await self.llm.aask(prompt) rsp_dict = json.loads(CodeParser.parse_code(block=None, text=rsp)) self.working_memory.add(Message(content=rsp_dict["thoughts"], role="assistant")) need_action = rsp_dict["state"] self._set_state(0) if need_action else self._set_state(-1) - + return need_action - + async def _act(self) -> Message: """Useful in 'react' mode. Return a Message conforming to Role._act interface.""" code, _, _ = await self._write_and_exec_code() return Message(content=code, role="assistant", cause_by=WriteAnalysisCode) - + async def _plan_and_act(self) -> Message: rsp = await super()._plan_and_act() await self.execute_code.terminate() return rsp - + async def _act_on_task(self, current_task: Task) -> TaskResult: """Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation.""" code, result, is_success = await self._write_and_exec_code() task_result = TaskResult(code=code, result=result, is_success=is_success) return task_result - + async def _write_and_exec_code(self, max_retry: int = 3): counter = 0 success = False - + # plan info plan_status = self.planner.get_plan_status() if self.use_plan else "" - + # tool info if self.tools: context = ( @@ -112,45 +112,46 @@ class DataInterpreter(Role): tool_info = await self.tool_recommender.get_recommended_tool_info(context=context, plan=plan) else: tool_info = "" - + # data info await self._check_data() - + while not success and counter < max_retry: ### write code ### code, cause_by = await self._write_code(counter, plan_status, tool_info) - + self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by)) - + ### execute code ### + import pdb;pdb.set_trace() result, success = await self.execute_code.run(code) print(result) - + self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode)) - + ### process execution result ### counter += 1 - + if not success and counter >= max_retry: logger.info("coding failed!") review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER) if ReviewConst.CHANGE_WORDS[0] in review: counter = 0 # redo the task again with help of human suggestions - + return code, result, success - + async def _write_code( - self, - counter: int, - plan_status: str = "", - tool_info: str = "", + self, + counter: int, + plan_status: str = "", + tool_info: str = "", ): todo = self.rc.todo # todo is WriteAnalysisCode logger.info(f"ready to {todo.name}") use_reflection = counter > 0 and self.use_reflection # only use reflection after the first trial - + user_requirement = self.get_memories()[0].content - + code = await todo.run( user_requirement=user_requirement, plan_status=plan_status, @@ -158,19 +159,19 @@ class DataInterpreter(Role): working_memory=self.working_memory.get(), use_reflection=use_reflection, ) - + return code, todo - + async def _check_data(self): if ( - not self.use_plan - or not self.planner.plan.get_finished_tasks() - or self.planner.plan.current_task.task_type - not in [ - TaskType.DATA_PREPROCESS.type_name, - TaskType.FEATURE_ENGINEERING.type_name, - TaskType.MODEL_TRAIN.type_name, - ] + not self.use_plan + or not self.planner.plan.get_finished_tasks() + or self.planner.plan.current_task.task_type + not in [ + TaskType.DATA_PREPROCESS.type_name, + TaskType.FEATURE_ENGINEERING.type_name, + TaskType.MODEL_TRAIN.type_name, + ] ): return logger.info("Check updated data") @@ -182,3 +183,6 @@ class DataInterpreter(Role): print(result) data_info = DATA_INFO.format(info=result) self.working_memory.add(Message(content=data_info, role="user", cause_by=CheckData)) + + def get_last_cell_source(self): + return self.execute_code.nb.cells[-1].source From 45eb35205408ce2d81dba8e0b77afbe141e17d6c Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 17:50:10 +0800 Subject: [PATCH 2/8] add repo utils --- data/inference/make_datasets/repo_utils.py | 83 ++++++++++++++++++++++ data/inference/make_datasets/utils.py | 28 ++++++++ data/inference/run.py | 22 ++++++ 3 files changed, 133 insertions(+) create mode 100644 data/inference/make_datasets/repo_utils.py create mode 100644 data/inference/make_datasets/utils.py create mode 100644 data/inference/run.py diff --git a/data/inference/make_datasets/repo_utils.py b/data/inference/make_datasets/repo_utils.py new file mode 100644 index 000000000..18eda3134 --- /dev/null +++ b/data/inference/make_datasets/repo_utils.py @@ -0,0 +1,83 @@ +import os +import subprocess +from typing import Dict +from traceback import format_exc +from pathlib import Path +import git +from git.exc import GitError + +from metagpt.logs import logger + +KEY_INSTANCE_ID = "instance_id" +RESET_FAILED = ">>>>> Reset Failed" + + +class ExecWrapper: + def __init__(self, subprocess_args: Dict = None): + self.subprocess_args = subprocess_args or {} + + def __call__(self, cmd, raise_error=True, **kwargs): + try: + combined_args = {**self.subprocess_args, **kwargs} + output = subprocess.run(cmd, **combined_args) + return output + except subprocess.CalledProcessError as e: + if raise_error: + error_message = f"Error: {e}\nError stdout: {e.stdout}\nError stderr: {e.stderr}\nError traceback: {format_exc()}" + logger.error(error_message) + raise e + + +class EnvManager: + def __init__(self, testbed): + shellenv = os.environ.copy() + self.testbed = testbed + + self.exec = ExecWrapper( + subprocess_args={ + "check": True, + "shell": False, + "capture_output": True, + "text": True, + "env": shellenv, + } + ) + + def clone_repo(self, repo_name: str, path: str, token: str = None): + if token is None: + token = os.environ.get("GITHUB_TOKEN", "git") + if not token: + raise ValueError("GitHub token is required for cloning repositories.") + + repo_url = f"https://{token}@github.com/swe-bench/{repo_name.replace('/', '__')}.git" + + try: + # Ensure the destination directory exists + os.makedirs(path, exist_ok=True) + + # Clone the repository + git.Repo.clone_from(repo_url, path) + print(f"Repository '{repo_name}' cloned successfully.") + except GitError as e: + print(f"Failed to clone repository '{repo_name}': {e}") + + def reset_task_env(self, instance: Dict): + """ + Reset task environment + testbed and checkout base commit of given task instance + """ + try: + gitignore_path = Path(".gitignore") + if gitignore_path.exists(): + self.exec(["git", "ls-files", "--ignored", "--exclude-standard", "-o", "-z"], raise_error=False) + self.exec(["xargs", "-0", "-r", "rm", "-rf"], input=gitignore_path.read_text()) + + self.exec(["git", "restore", "."]) + self.exec(["git", "reset", "HEAD", "."]) + self.exec(["git", "clean", "-fdx"]) + self.exec(["git", "-c", "advice.detachedHead=false", "checkout", instance['base_commit']]) + logger.info(f"[{instance['instance_id']}] Reset task environment to {instance['base_commit']}") + return True + except Exception as e: + err_msg = f"{RESET_FAILED}; Failed to reset task environment to {instance['base_commit']}: {e}" + logger.error(err_msg) + return False \ No newline at end of file diff --git a/data/inference/make_datasets/utils.py b/data/inference/make_datasets/utils.py new file mode 100644 index 000000000..6ecbd5832 --- /dev/null +++ b/data/inference/make_datasets/utils.py @@ -0,0 +1,28 @@ +import re + + +def extract_diff(response): + """ + Extracts the diff from a response formatted in different ways + """ + if response is None: + return None + diff_matches = [] + other_matches = [] + pattern = re.compile(r"\<([\w-]+)\>(.*?)\<\/\1\>", re.DOTALL) + for code, match in pattern.findall(response): + if code in {"diff", "patch"}: + diff_matches.append(match) + else: + other_matches.append(match) + pattern = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL) + for code, match in pattern.findall(response): + if code in {"diff", "patch"}: + diff_matches.append(match) + else: + other_matches.append(match) + if diff_matches: + return diff_matches[0] + if other_matches: + return other_matches[0] + return response.split("")[0] \ No newline at end of file diff --git a/data/inference/run.py b/data/inference/run.py new file mode 100644 index 000000000..d71ff580a --- /dev/null +++ b/data/inference/run.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import runpy +import sys + +# 备份原始的sys.argv +original_argv = sys.argv.copy() + +try: + # 设置你想要传递给脚本的命令行参数 + sys.argv = ['run_api.py', '--dataset_name_or_path', 'princeton-nlp/SWE-bench_oracle', '--output_dir', + './outputs'] + # 添加其他可选参数到sys.argv中,例如: + # sys.argv.extend(['--some_option', 'some_value']) + + # 执行脚本 + runpy.run_path(path_name='run_api.py', run_name='__main__') +finally: + # 恢复原始的sys.argv以避免对后续代码的潜在影响 + sys.argv = original_argv + From fc23e8f27ec7757d7cd1837dcf0ee6578585c4b6 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:07:18 +0800 Subject: [PATCH 3/8] add instance filter --- data/inference/run_api.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/data/inference/run_api.py b/data/inference/run_api.py index 66f229f85..9202d6a42 100644 --- a/data/inference/run_api.py +++ b/data/inference/run_api.py @@ -15,6 +15,7 @@ from metagpt.logs import logger from metagpt.roles.di.data_interpreter import DataInterpreter from metagpt.utils import count_string_tokens from metagpt.utils.recovery_util import save_history +from data.inference.const import SCIKIT_LEARN_IDS # Replace with your own MAX_TOKEN = 128000 @@ -70,6 +71,7 @@ async def openai_inference( for datum in tqdm(test_dataset, desc=f"Inference for {model_name_or_path}"): di = DataInterpreter(use_reflection=use_reflection) instance_id = datum["instance_id"] + if instance_id in existing_ids: continue output_dict = {"instance_id": instance_id} @@ -124,12 +126,19 @@ async def main( dataset = dataset[split] lens = np.array(list(map(len, dataset["text"]))) dataset = dataset.select(np.argsort(lens)) + if len(existing_ids) > 0: dataset = dataset.filter( lambda x: x["instance_id"] not in existing_ids, desc="Filtering out existing ids", load_from_cache_file=False, ) + if len(SCIKIT_LEARN_IDS) > 0: + dataset = dataset.filter( + lambda x: x["instance_id"] in SCIKIT_LEARN_IDS, + desc="Filtering out subset_instance_ids", + load_from_cache_file=False, + ) inference_args = { "test_dataset": dataset, "model_name_or_path": model_name_or_path, From 12e90095294f7118974ad3be65407ef39ef6745b Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:12:54 +0800 Subject: [PATCH 4/8] rm useless comments --- data/inference/run.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/data/inference/run.py b/data/inference/run.py index d71ff580a..96d9cc082 100644 --- a/data/inference/run.py +++ b/data/inference/run.py @@ -4,16 +4,12 @@ import runpy import sys -# 备份原始的sys.argv original_argv = sys.argv.copy() try: # 设置你想要传递给脚本的命令行参数 sys.argv = ['run_api.py', '--dataset_name_or_path', 'princeton-nlp/SWE-bench_oracle', '--output_dir', './outputs'] - # 添加其他可选参数到sys.argv中,例如: - # sys.argv.extend(['--some_option', 'some_value']) - # 执行脚本 runpy.run_path(path_name='run_api.py', run_name='__main__') finally: From 0eec61188c6188207d5c42b0447d14220c0a37bf Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:13:58 +0800 Subject: [PATCH 5/8] rm repo utils, contains git cmd --- data/inference/make_datasets/repo_utils.py | 83 ---------------------- 1 file changed, 83 deletions(-) delete mode 100644 data/inference/make_datasets/repo_utils.py diff --git a/data/inference/make_datasets/repo_utils.py b/data/inference/make_datasets/repo_utils.py deleted file mode 100644 index 18eda3134..000000000 --- a/data/inference/make_datasets/repo_utils.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -import subprocess -from typing import Dict -from traceback import format_exc -from pathlib import Path -import git -from git.exc import GitError - -from metagpt.logs import logger - -KEY_INSTANCE_ID = "instance_id" -RESET_FAILED = ">>>>> Reset Failed" - - -class ExecWrapper: - def __init__(self, subprocess_args: Dict = None): - self.subprocess_args = subprocess_args or {} - - def __call__(self, cmd, raise_error=True, **kwargs): - try: - combined_args = {**self.subprocess_args, **kwargs} - output = subprocess.run(cmd, **combined_args) - return output - except subprocess.CalledProcessError as e: - if raise_error: - error_message = f"Error: {e}\nError stdout: {e.stdout}\nError stderr: {e.stderr}\nError traceback: {format_exc()}" - logger.error(error_message) - raise e - - -class EnvManager: - def __init__(self, testbed): - shellenv = os.environ.copy() - self.testbed = testbed - - self.exec = ExecWrapper( - subprocess_args={ - "check": True, - "shell": False, - "capture_output": True, - "text": True, - "env": shellenv, - } - ) - - def clone_repo(self, repo_name: str, path: str, token: str = None): - if token is None: - token = os.environ.get("GITHUB_TOKEN", "git") - if not token: - raise ValueError("GitHub token is required for cloning repositories.") - - repo_url = f"https://{token}@github.com/swe-bench/{repo_name.replace('/', '__')}.git" - - try: - # Ensure the destination directory exists - os.makedirs(path, exist_ok=True) - - # Clone the repository - git.Repo.clone_from(repo_url, path) - print(f"Repository '{repo_name}' cloned successfully.") - except GitError as e: - print(f"Failed to clone repository '{repo_name}': {e}") - - def reset_task_env(self, instance: Dict): - """ - Reset task environment + testbed and checkout base commit of given task instance - """ - try: - gitignore_path = Path(".gitignore") - if gitignore_path.exists(): - self.exec(["git", "ls-files", "--ignored", "--exclude-standard", "-o", "-z"], raise_error=False) - self.exec(["xargs", "-0", "-r", "rm", "-rf"], input=gitignore_path.read_text()) - - self.exec(["git", "restore", "."]) - self.exec(["git", "reset", "HEAD", "."]) - self.exec(["git", "clean", "-fdx"]) - self.exec(["git", "-c", "advice.detachedHead=false", "checkout", instance['base_commit']]) - logger.info(f"[{instance['instance_id']}] Reset task environment to {instance['base_commit']}") - return True - except Exception as e: - err_msg = f"{RESET_FAILED}; Failed to reset task environment to {instance['base_commit']}: {e}" - logger.error(err_msg) - return False \ No newline at end of file From 7eb379bf9f4a87534812137d45ec493475711cc6 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:19:53 +0800 Subject: [PATCH 6/8] pre-commit format --- data/inference/make_datasets/utils.py | 1 - data/inference/run.py | 6 +- data/inference/run_api.py | 4 +- metagpt/roles/di/data_interpreter.py | 80 ++++++++++++++------------- 4 files changed, 45 insertions(+), 46 deletions(-) diff --git a/data/inference/make_datasets/utils.py b/data/inference/make_datasets/utils.py index 081c1bc1f..284f8d976 100644 --- a/data/inference/make_datasets/utils.py +++ b/data/inference/make_datasets/utils.py @@ -1,5 +1,4 @@ import re -import re def extract_diff(response): diff --git a/data/inference/run.py b/data/inference/run.py index 96d9cc082..a3f3c54aa 100644 --- a/data/inference/run.py +++ b/data/inference/run.py @@ -8,11 +8,9 @@ original_argv = sys.argv.copy() try: # 设置你想要传递给脚本的命令行参数 - sys.argv = ['run_api.py', '--dataset_name_or_path', 'princeton-nlp/SWE-bench_oracle', '--output_dir', - './outputs'] + sys.argv = ["run_api.py", "--dataset_name_or_path", "princeton-nlp/SWE-bench_oracle", "--output_dir", "./outputs"] # 执行脚本 - runpy.run_path(path_name='run_api.py', run_name='__main__') + runpy.run_path(path_name="run_api.py", run_name="__main__") finally: # 恢复原始的sys.argv以避免对后续代码的潜在影响 sys.argv = original_argv - diff --git a/data/inference/run_api.py b/data/inference/run_api.py index 9202d6a42..7882f13e7 100644 --- a/data/inference/run_api.py +++ b/data/inference/run_api.py @@ -10,12 +10,12 @@ from make_datasets.utils import extract_diff from tenacity import retry, stop_after_attempt, wait_random_exponential from tqdm.auto import tqdm +from data.inference.const import SCIKIT_LEARN_IDS from metagpt.config2 import config from metagpt.logs import logger from metagpt.roles.di.data_interpreter import DataInterpreter from metagpt.utils import count_string_tokens from metagpt.utils.recovery_util import save_history -from data.inference.const import SCIKIT_LEARN_IDS # Replace with your own MAX_TOKEN = 128000 @@ -71,7 +71,7 @@ async def openai_inference( for datum in tqdm(test_dataset, desc=f"Inference for {model_name_or_path}"): di = DataInterpreter(use_reflection=use_reflection) instance_id = datum["instance_id"] - + if instance_id in existing_ids: continue output_dict = {"instance_id": instance_id} diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py index 11be96dcd..0e2cce309 100644 --- a/metagpt/roles/di/data_interpreter.py +++ b/metagpt/roles/di/data_interpreter.py @@ -43,66 +43,66 @@ class DataInterpreter(Role): tool_recommender: ToolRecommender = None react_mode: Literal["plan_and_act", "react"] = "plan_and_act" max_react_loop: int = 10 # used for react mode - + @model_validator(mode="after") def set_plan_and_tool(self) -> "Interpreter": self._set_react_mode(react_mode=self.react_mode, max_react_loop=self.max_react_loop, auto_run=self.auto_run) self.use_plan = ( - self.react_mode == "plan_and_act" + self.react_mode == "plan_and_act" ) # create a flag for convenience, overwrite any passed-in value if self.tools: self.tool_recommender = BM25ToolRecommender(tools=self.tools) self.set_actions([WriteAnalysisCode]) self._set_state(0) return self - + @property def working_memory(self): return self.rc.working_memory - + async def _think(self) -> bool: """Useful in 'react' mode. Use LLM to decide whether and what to do next.""" user_requirement = self.get_memories()[0].content context = self.working_memory.get() - + if not context: # just started the run, we need action certainly self.working_memory.add(self.get_memories()[0]) # add user requirement to working memory self._set_state(0) return True - + prompt = REACT_THINK_PROMPT.format(user_requirement=user_requirement, context=context) rsp = await self.llm.aask(prompt) rsp_dict = json.loads(CodeParser.parse_code(block=None, text=rsp)) self.working_memory.add(Message(content=rsp_dict["thoughts"], role="assistant")) need_action = rsp_dict["state"] self._set_state(0) if need_action else self._set_state(-1) - + return need_action - + async def _act(self) -> Message: """Useful in 'react' mode. Return a Message conforming to Role._act interface.""" code, _, _ = await self._write_and_exec_code() return Message(content=code, role="assistant", cause_by=WriteAnalysisCode) - + async def _plan_and_act(self) -> Message: rsp = await super()._plan_and_act() await self.execute_code.terminate() return rsp - + async def _act_on_task(self, current_task: Task) -> TaskResult: """Useful in 'plan_and_act' mode. Wrap the output in a TaskResult for review and confirmation.""" code, result, is_success = await self._write_and_exec_code() task_result = TaskResult(code=code, result=result, is_success=is_success) return task_result - + async def _write_and_exec_code(self, max_retry: int = 3): counter = 0 success = False - + # plan info plan_status = self.planner.get_plan_status() if self.use_plan else "" - + # tool info if self.tools: context = ( @@ -112,46 +112,48 @@ class DataInterpreter(Role): tool_info = await self.tool_recommender.get_recommended_tool_info(context=context, plan=plan) else: tool_info = "" - + # data info await self._check_data() - + while not success and counter < max_retry: ### write code ### code, cause_by = await self._write_code(counter, plan_status, tool_info) - + self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by)) - + ### execute code ### - import pdb;pdb.set_trace() + import pdb + + pdb.set_trace() result, success = await self.execute_code.run(code) print(result) - + self.working_memory.add(Message(content=result, role="user", cause_by=ExecuteNbCode)) - + ### process execution result ### counter += 1 - + if not success and counter >= max_retry: logger.info("coding failed!") review, _ = await self.planner.ask_review(auto_run=False, trigger=ReviewConst.CODE_REVIEW_TRIGGER) if ReviewConst.CHANGE_WORDS[0] in review: counter = 0 # redo the task again with help of human suggestions - + return code, result, success - + async def _write_code( - self, - counter: int, - plan_status: str = "", - tool_info: str = "", + self, + counter: int, + plan_status: str = "", + tool_info: str = "", ): todo = self.rc.todo # todo is WriteAnalysisCode logger.info(f"ready to {todo.name}") use_reflection = counter > 0 and self.use_reflection # only use reflection after the first trial - + user_requirement = self.get_memories()[0].content - + code = await todo.run( user_requirement=user_requirement, plan_status=plan_status, @@ -159,19 +161,19 @@ class DataInterpreter(Role): working_memory=self.working_memory.get(), use_reflection=use_reflection, ) - + return code, todo - + async def _check_data(self): if ( - not self.use_plan - or not self.planner.plan.get_finished_tasks() - or self.planner.plan.current_task.task_type - not in [ - TaskType.DATA_PREPROCESS.type_name, - TaskType.FEATURE_ENGINEERING.type_name, - TaskType.MODEL_TRAIN.type_name, - ] + not self.use_plan + or not self.planner.plan.get_finished_tasks() + or self.planner.plan.current_task.task_type + not in [ + TaskType.DATA_PREPROCESS.type_name, + TaskType.FEATURE_ENGINEERING.type_name, + TaskType.MODEL_TRAIN.type_name, + ] ): return logger.info("Check updated data") From fe2ae365fee152d976052c3f3b113cd3a2203bc4 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:20:49 +0800 Subject: [PATCH 7/8] rm debug msg --- metagpt/roles/di/data_interpreter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py index 0e2cce309..c30d998e9 100644 --- a/metagpt/roles/di/data_interpreter.py +++ b/metagpt/roles/di/data_interpreter.py @@ -123,9 +123,6 @@ class DataInterpreter(Role): self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by)) ### execute code ### - import pdb - - pdb.set_trace() result, success = await self.execute_code.run(code) print(result) From 9a7279bf91366543957800abf7835ee429bdb6b5 Mon Sep 17 00:00:00 2001 From: stellahsr Date: Tue, 19 Mar 2024 20:24:43 +0800 Subject: [PATCH 8/8] add const for swe-bench --- data/inference/const.py | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 data/inference/const.py diff --git a/data/inference/const.py b/data/inference/const.py new file mode 100644 index 000000000..42e63ec0e --- /dev/null +++ b/data/inference/const.py @@ -0,0 +1,53 @@ +# -*- coding: utf-8 -*- +# @Author : stellahong (stellahong@fuzhi.ai) +# @Desc : +import pandas as pd + +from metagpt.const import METAGPT_ROOT + +SUBSET_DATASET = METAGPT_ROOT / "sub_swebench_dataset" / "sub_swebench.csv" +SUBSET_DATASET_SKLERARN = METAGPT_ROOT / "sub_swebench_dataset" / "scikit-learn-68.csv" + +# SCIKIT_LEARN_IDS: A list of instance identifiers from 'sub_swebench.csv' within SUBSET_DATASET. +# This collection represents a subset specifically related to scikit-learn content. +SCIKIT_LEARN_IDS = [ + "scikit-learn__scikit-learn-11578", + "scikit-learn__scikit-learn-10297", + "scikit-learn__scikit-learn-25747", + "scikit-learn__scikit-learn-15512", + "scikit-learn__scikit-learn-15119", + "scikit-learn__scikit-learn-10870", + "scikit-learn__scikit-learn-15100", + "scikit-learn__scikit-learn-14496", + "scikit-learn__scikit-learn-14890", + "scikit-learn__scikit-learn-10428", + "scikit-learn__scikit-learn-25744", + "scikit-learn__scikit-learn-11542", + "scikit-learn__scikit-learn-10198", + "scikit-learn__scikit-learn-10459", +] + + +def read_sub_set_instance(path=SUBSET_DATASET, tag="scikit-learn"): + try: + df = pd.read_excel(path) + # Filter for instances containing the tag in either column + pass_filter = df["instance_id_pass"].str.contains(tag, na=False) + fail_filter = df["instance_id_fail"].str.contains(tag, na=False) + + # Combine the filters using | (OR operator) for efficiency + combined_filter = pass_filter | fail_filter + + # Apply combined filter and select the specific columns + filtered_df = df[combined_filter][["instance_id_pass", "instance_id_fail"]] + + # Flatten the DataFrame into a list and remove NaN values + subset_instance = filtered_df.stack().dropna().tolist() + + return subset_instance + except FileNotFoundError: + print(f"File not found: {path}") + return [] + except Exception as e: + print(f"An error occurred: {e}") + return []