diff --git a/metagpt/ext/cr/actions/code_review.py b/metagpt/ext/cr/actions/code_review.py index 2cbeeb3ee..99fef3bc4 100644 --- a/metagpt/ext/cr/actions/code_review.py +++ b/metagpt/ext/cr/actions/code_review.py @@ -5,7 +5,7 @@ import json import re -from unidiff import PatchedFile, PatchSet +from unidiff import PatchSet from metagpt.actions.action import Action from metagpt.ext.cr.utils.cleaner import ( @@ -19,6 +19,7 @@ from metagpt.utils.common import parse_json_code_block CODE_REVIEW_PROMPT_TEMPLATE = """ NOTICE +Let's think and work step by step. With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one. The Patch code has added line number at the first character each line for reading, but the review should focus on new added code inside the `Patch` (lines starting with line number and '+'). @@ -52,12 +53,13 @@ CodeReview guidelines: - Don't suggest to add docstring unless it's necessary indeed. - If the same code error occurs multiple times, it cannot be omitted, and all places need to be identified.But Don't duplicate at the same place with the same comment! - Every line of code in the patch needs to be carefully checked, and laziness cannot be omitted. It is necessary to find out all the places. +- The `comment` and `point_id` in the Output must correspond to and belong to the same one `Point`. Just print the PR Patch comments in json format like **Output Format**. """ CODE_REVIEW_COMFIRM_SYSTEM_PROMPT = """ -You are a professional engineer with {code_language} stack, and good at code review comment result judgement. +You are a professional engineer with {code_language} stack, and good at code review comment result judgement.Let's think and work step by step. """ CODE_REVIEW_COMFIRM_TEMPLATE = """ @@ -89,25 +91,35 @@ class CodeReview(Action): new_comments = [] logger.debug(f"original comments: {comments}") for cmt in comments: - for p in points: - if int(cmt.get("point_id", -1)) == p.id: - code_start_line = cmt.get("code_start_line") - code_end_line = cmt.get("code_end_line") - code = get_code_block_from_patch(patch, code_start_line, code_end_line) + try: + if cmt.get("commented_file").endswith(".py"): + points = [p for p in points if p.language == "Python"] + elif cmt.get("commented_file").endswith(".java"): + points = [p for p in points if p.language == "Java"] + else: + continue + for p in points: + point_id = int(cmt.get("point_id", -1)) + if point_id == p.id: + code_start_line = cmt.get("code_start_line") + code_end_line = cmt.get("code_end_line") + code = get_code_block_from_patch(patch, code_start_line, code_end_line) - new_comments.append( - { - "commented_file": cmt.get("commented_file"), - "code": code, - "code_start_line": code_start_line, - "code_end_line": code_end_line, - "comment": cmt.get("comment"), - "point_id": p.id, - "point": p.text, - "point_detail": p.detail, - } - ) - break + new_comments.append( + { + "commented_file": cmt.get("commented_file"), + "code": code, + "code_start_line": code_start_line, + "code_end_line": code_end_line, + "comment": cmt.get("comment"), + "point_id": p.id, + "point": p.text, + "point_detail": p.detail, + } + ) + break + except Exception: + pass logger.debug(f"new_comments: {new_comments}") return new_comments @@ -151,43 +163,27 @@ class CodeReview(Action): logger.info(f"original comments num: {len(comments)}, confirmed comments num: {len(new_comments)}") return new_comments - async def cr_by_full_points(self, patch: PatchSet, points: list[Point]): + async def cr_by_points(self, patch: PatchSet, points: list[Point]): comments = [] - points_str = "\n".join([f"{p.id} {p.text}" for p in points]) for patched_file in patch: if patched_file.path.endswith(".py"): - points_str = "\n".join([f"{p.id} {p.text}" for p in points if p.language == "Python"]) + points = [p for p in points if p.language == "Python"] elif patched_file.path.endswith(".java"): - points_str = "\n".join([f"{p.id} {p.text}" for p in points if p.language == "Java"]) + points = [p for p in points if p.language == "Java"] else: continue - if len(str(patched_file).splitlines()) >= 50: - cr_by_segment_points_comments = await self.cr_by_segment_points( - patched_file=patched_file, points=points - ) - comments += cr_by_segment_points_comments - continue - prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str) - resp = await self.llm.aask(prompt) - json_str = parse_json_code_block(resp)[0] - comment = json.loads(json_str) - patched_file_path = patched_file.path - for c in comment: - c["commented_file"] = patched_file_path - comments += comment - - return comments - - async def cr_by_segment_points(self, patched_file: PatchedFile, points: list[Point]): - comments = [] - group_points = [points[i : i + 3] for i in range(0, len(points), 3)] - for group_point in group_points: - points_str = "\n".join([f"{p.id} {p.text}" for p in group_point]) - prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str) - resp = await self.llm.aask(prompt) - json_str = parse_json_code_block(resp)[0] - comments_batch = json.loads(json_str) - comments += comments_batch + group_points = [points[i : i + 3] for i in range(0, len(points), 3)] + for group_point in group_points: + points_str = "id description\n" + points_str += "\n".join([f"{p.id} {p.text}" for p in group_point]) + prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str) + resp = await self.llm.aask(prompt) + json_str = parse_json_code_block(resp)[0] + comments_batch = json.loads(json_str) + patched_file_path = patched_file.path + for c in comments_batch: + c["commented_file"] = patched_file_path + comments += comments_batch return comments @@ -196,7 +192,7 @@ class CodeReview(Action): patch: PatchSet = add_line_num_on_patch(patch) result = [] - comments = await self.cr_by_full_points(patch=patch, points=points) + comments = await self.cr_by_points(patch=patch, points=points) if len(comments) != 0: comments = self.format_comments(comments, points, patch) comments = await self.confirm_comments(patch=patch, comments=comments, points=points) diff --git a/metagpt/ext/cr/points.json b/metagpt/ext/cr/points.json index 5455d3865..6cdcd275d 100644 --- a/metagpt/ext/cr/points.json +++ b/metagpt/ext/cr/points.json @@ -21,7 +21,7 @@ "language": "Java", "detail": "缺陷类型:避免函数中未使用的形参;修复方案:忽略", "yes_example": "### 被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic void setTransientVariablesLocal(Map transientVariables) {\n throw new UnsupportedOperationException(\"No execution active, no variables can be set\");\n}这段代码中的形参\"transientVariables\"未在函数体内出现,所以这个被判定为\"避免函数中未使用的形参\"\n\n\n<例子2>\nprotected void modifyFetchPersistencePackageRequest(PersistencePackageRequest ppr, Map pathVars) {}\n这段代码中的形参\"ppr\"和\"pathVars\"未在函数体内出现,所以这个被判定为\"避免函数中未使用的形参\"\n", - "no_example": "### 不能被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic String processFindForm(@RequestParam(value = \"pageNo\", defaultValue = \"1\") int pageNo) {\n\tlastName = owner.getLastName();\n\treturn addPaginationModel(pageNo, paginationModel, lastName, ownersResults);\n}上述代码中的形参\"pageNo\"在当前函数'processFindForm'内被'return addPaginationModel(pageNo, paginationModel, lastName, ownersResults);'这一句被使用,虽然pageNo没有被用于逻辑计算,但作为了函数调用其他函数的参数使用了,所以这个不能被判定为\"避免函数中未使用的形参\"\n" + "no_example": "### 不能被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic String processFindForm(@RequestParam(value = \"pageNo\", defaultValue = \"1\") int pageNo) {\n\tlastName = owner.getLastName();\n\treturn addPaginationModel(pageNo, paginationModel, lastName, ownersResults);\n}这段代码中的形参\"pageNo\"在当前函数'processFindForm'内被'return addPaginationModel(pageNo, paginationModel, lastName, ownersResults);'这一句被使用,虽然pageNo没有被用于逻辑计算,但作为了函数调用其他函数的参数使用了,所以这个不能被判定为\"避免函数中未使用的形参\"\n\n<例子2>\npublic void formatDate(Date date) {\n\tSimpleDateFormat sdf = new SimpleDateFormat(\"yyyy-MM-dd\");\n\tSystem.out.println(\"Formatted date: \" + sdf.format(date));\n}这段代码中的形参date在System.out.println(\"Formatted date: \" + sdf.format(date))这一句中被引用到,所以这个不能被判定为\"避免函数中未使用的形参\"\n" }, { "id": 4,