update:cr

This commit is contained in:
zhanglei 2024-07-11 20:02:06 +08:00
parent af03b0c6db
commit 54ac38c093
2 changed files with 49 additions and 53 deletions

View file

@ -5,7 +5,7 @@
import json
import re
from unidiff import PatchedFile, PatchSet
from unidiff import PatchSet
from metagpt.actions.action import Action
from metagpt.ext.cr.utils.cleaner import (
@ -19,6 +19,7 @@ from metagpt.utils.common import parse_json_code_block
CODE_REVIEW_PROMPT_TEMPLATE = """
NOTICE
Let's think and work step by step.
With the given pull-request(PR) Patch, and referenced Points(Code Standards), you should compare each point with the code one-by-one.
The Patch code has added line number at the first character each line for reading, but the review should focus on new added code inside the `Patch` (lines starting with line number and '+').
@ -52,12 +53,13 @@ CodeReview guidelines:
- Don't suggest to add docstring unless it's necessary indeed.
- If the same code error occurs multiple times, it cannot be omitted, and all places need to be identified.But Don't duplicate at the same place with the same comment!
- Every line of code in the patch needs to be carefully checked, and laziness cannot be omitted. It is necessary to find out all the places.
- The `comment` and `point_id` in the Output must correspond to and belong to the same one `Point`.
Just print the PR Patch comments in json format like **Output Format**.
"""
CODE_REVIEW_COMFIRM_SYSTEM_PROMPT = """
You are a professional engineer with {code_language} stack, and good at code review comment result judgement.
You are a professional engineer with {code_language} stack, and good at code review comment result judgement.Let's think and work step by step.
"""
CODE_REVIEW_COMFIRM_TEMPLATE = """
@ -89,25 +91,35 @@ class CodeReview(Action):
new_comments = []
logger.debug(f"original comments: {comments}")
for cmt in comments:
for p in points:
if int(cmt.get("point_id", -1)) == p.id:
code_start_line = cmt.get("code_start_line")
code_end_line = cmt.get("code_end_line")
code = get_code_block_from_patch(patch, code_start_line, code_end_line)
try:
if cmt.get("commented_file").endswith(".py"):
points = [p for p in points if p.language == "Python"]
elif cmt.get("commented_file").endswith(".java"):
points = [p for p in points if p.language == "Java"]
else:
continue
for p in points:
point_id = int(cmt.get("point_id", -1))
if point_id == p.id:
code_start_line = cmt.get("code_start_line")
code_end_line = cmt.get("code_end_line")
code = get_code_block_from_patch(patch, code_start_line, code_end_line)
new_comments.append(
{
"commented_file": cmt.get("commented_file"),
"code": code,
"code_start_line": code_start_line,
"code_end_line": code_end_line,
"comment": cmt.get("comment"),
"point_id": p.id,
"point": p.text,
"point_detail": p.detail,
}
)
break
new_comments.append(
{
"commented_file": cmt.get("commented_file"),
"code": code,
"code_start_line": code_start_line,
"code_end_line": code_end_line,
"comment": cmt.get("comment"),
"point_id": p.id,
"point": p.text,
"point_detail": p.detail,
}
)
break
except Exception:
pass
logger.debug(f"new_comments: {new_comments}")
return new_comments
@ -151,43 +163,27 @@ class CodeReview(Action):
logger.info(f"original comments num: {len(comments)}, confirmed comments num: {len(new_comments)}")
return new_comments
async def cr_by_full_points(self, patch: PatchSet, points: list[Point]):
async def cr_by_points(self, patch: PatchSet, points: list[Point]):
comments = []
points_str = "\n".join([f"{p.id} {p.text}" for p in points])
for patched_file in patch:
if patched_file.path.endswith(".py"):
points_str = "\n".join([f"{p.id} {p.text}" for p in points if p.language == "Python"])
points = [p for p in points if p.language == "Python"]
elif patched_file.path.endswith(".java"):
points_str = "\n".join([f"{p.id} {p.text}" for p in points if p.language == "Java"])
points = [p for p in points if p.language == "Java"]
else:
continue
if len(str(patched_file).splitlines()) >= 50:
cr_by_segment_points_comments = await self.cr_by_segment_points(
patched_file=patched_file, points=points
)
comments += cr_by_segment_points_comments
continue
prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str)
resp = await self.llm.aask(prompt)
json_str = parse_json_code_block(resp)[0]
comment = json.loads(json_str)
patched_file_path = patched_file.path
for c in comment:
c["commented_file"] = patched_file_path
comments += comment
return comments
async def cr_by_segment_points(self, patched_file: PatchedFile, points: list[Point]):
comments = []
group_points = [points[i : i + 3] for i in range(0, len(points), 3)]
for group_point in group_points:
points_str = "\n".join([f"{p.id} {p.text}" for p in group_point])
prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str)
resp = await self.llm.aask(prompt)
json_str = parse_json_code_block(resp)[0]
comments_batch = json.loads(json_str)
comments += comments_batch
group_points = [points[i : i + 3] for i in range(0, len(points), 3)]
for group_point in group_points:
points_str = "id description\n"
points_str += "\n".join([f"{p.id} {p.text}" for p in group_point])
prompt = CODE_REVIEW_PROMPT_TEMPLATE.format(patch=str(patched_file), points=points_str)
resp = await self.llm.aask(prompt)
json_str = parse_json_code_block(resp)[0]
comments_batch = json.loads(json_str)
patched_file_path = patched_file.path
for c in comments_batch:
c["commented_file"] = patched_file_path
comments += comments_batch
return comments
@ -196,7 +192,7 @@ class CodeReview(Action):
patch: PatchSet = add_line_num_on_patch(patch)
result = []
comments = await self.cr_by_full_points(patch=patch, points=points)
comments = await self.cr_by_points(patch=patch, points=points)
if len(comments) != 0:
comments = self.format_comments(comments, points, patch)
comments = await self.confirm_comments(patch=patch, comments=comments, points=points)

View file

@ -21,7 +21,7 @@
"language": "Java",
"detail": "缺陷类型:避免函数中未使用的形参;修复方案:忽略",
"yes_example": "### 被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic void setTransientVariablesLocal(Map<String, Object> transientVariables) {\n throw new UnsupportedOperationException(\"No execution active, no variables can be set\");\n}这段代码中的形参\"transientVariables\"未在函数体内出现,所以这个被判定为\"避免函数中未使用的形参\"\n</例子1>\n\n<例子2>\nprotected void modifyFetchPersistencePackageRequest(PersistencePackageRequest ppr, Map<String, String> pathVars) {}\n这段代码中的形参\"ppr\"和\"pathVars\"未在函数体内出现,所以这个被判定为\"避免函数中未使用的形参\"\n</例子2>",
"no_example": "### 不能被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic String processFindForm(@RequestParam(value = \"pageNo\", defaultValue = \"1\") int pageNo) {\n\tlastName = owner.getLastName();\n\treturn addPaginationModel(pageNo, paginationModel, lastName, ownersResults);\n}上述代码中的形参\"pageNo\"在当前函数'processFindForm'内被'return addPaginationModel(pageNo, paginationModel, lastName, ownersResults);'这一句被使用虽然pageNo没有被用于逻辑计算但作为了函数调用其他函数的参数使用了所以这个不能被判定为\"避免函数中未使用的形参\"\n</例子1>"
"no_example": "### 不能被判定为\"避免函数中未使用的形参\"的例子\n<例子1>\npublic String processFindForm(@RequestParam(value = \"pageNo\", defaultValue = \"1\") int pageNo) {\n\tlastName = owner.getLastName();\n\treturn addPaginationModel(pageNo, paginationModel, lastName, ownersResults);\n}这段代码中的形参\"pageNo\"在当前函数'processFindForm'内被'return addPaginationModel(pageNo, paginationModel, lastName, ownersResults);'这一句被使用虽然pageNo没有被用于逻辑计算但作为了函数调用其他函数的参数使用了所以这个不能被判定为\"避免函数中未使用的形参\"\n</例子1>\n<例子2>\npublic void formatDate(Date date) {\n\tSimpleDateFormat sdf = new SimpleDateFormat(\"yyyy-MM-dd\");\n\tSystem.out.println(\"Formatted date: \" + sdf.format(date));\n}这段代码中的形参date在System.out.println(\"Formatted date: \" + sdf.format(date))这一句中被引用到,所以这个不能被判定为\"避免函数中未使用的形参\"\n</例子2>"
},
{
"id": 4,