1. RunCode -> DebugError loop done; 2. modify default k for memory search

This commit is contained in:
yzlin 2023-08-01 12:16:38 +08:00
parent 6bf527d31e
commit 8abdca3057
10 changed files with 140 additions and 70 deletions

View file

@ -5,7 +5,9 @@
@Author : alexanderwu
@File : debug_error.py
"""
import re
from metagpt.actions.action import Action
from metagpt.utils.common import CodeParser
PROMPT_TEMPLATE = """
NOTICE
@ -21,7 +23,7 @@ Now you should start rewriting the code:
## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
"""
class DebugError(Action):
def __init__(self, name, context=None, llm=None):
def __init__(self, name="DebugError", context=None, llm=None):
super().__init__(name, context, llm)
# async def run(self, code, error):
@ -31,8 +33,15 @@ class DebugError(Action):
# return fixed_code
async def run(self, context):
if "PASS" in context:
return "", "the original code works fine, no need to debug"
file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
prompt = PROMPT_TEMPLATE.format(context=context)
rsp = await self._aask(prompt)
return rsp
code = CodeParser.parse_code(block="", text=rsp)
return file_name, code

View file

@ -16,8 +16,25 @@ PROMPT_TEMPLATE = """
Role: You are a senior development and qa engineer, your role is summarize the code running result.
If the running result does not include an error, you should explicitly approve the result.
On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error,
and give specific instructions on fixing the errors.
and give specific instructions on fixing the errors. Here is the code info:
{context}
Now you should begin your analysis
---
## instruction:
Please summarize the cause of the errors and give correction instruction
## File To Rewrite:
Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
## Status:
Determine if all of the code works fine, if so write PASS, else FAIL,
WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
## Send To:
Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
---
You should fill in necessary instruction, status, send to, and finally return all content between the --- segment line.
"""
CONTEXT = """
## Development Code File Name
{code_file_name}
## Development Code
@ -35,18 +52,6 @@ and give specific instructions on fixing the errors.
## Running Output
standard output: {outs};
standard errors: {errs};
## instruction:
Please summarize the cause of the errors and give correction instruction
## File To Rewrite
Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
## Status:
Determine if all of the code works fine, if so write PASS, else FAIL,
WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
## Send To:
Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
---
You should fill in necessary summary, status, send to, and finally return all content between the --- segment line.
"""
class RunCode(Action):
@ -97,15 +102,20 @@ class RunCode(Action):
elif mode == "text":
outs, errs = await self.run_text(code=code)
logger.info(outs)
logger.info(errs)
prompt = PROMPT_TEMPLATE.format(
logger.info(f"{outs=}")
logger.info(f"{errs=}")
context = CONTEXT.format(
code=code, code_file_name=code_file_name,
test_code=test_code, test_file_name=test_file_name,
command=" ".join(command),
outs=outs, errs=errs
outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow
errs=errs
)
prompt = PROMPT_TEMPLATE.format(context=context)
rsp = await self._aask(prompt)
return rsp
result = context + rsp
return result

View file

@ -30,7 +30,7 @@ you should correctly import the necessary classes based on these file locations!
"""
class WriteTest(Action):
def __init__(self, name="", context=None, llm=None):
def __init__(self, name="WriteTest", context=None, llm=None):
super().__init__(name, context, llm)
async def write_code(self, prompt):

View file

@ -43,13 +43,13 @@ class LongTermMemory(Memory):
# and ignore adding messages from recover repeatedly
self.memory_storage.add(message)
def remember(self, observed: list[Message], k=10) -> list[Message]:
def remember(self, observed: list[Message], k=0) -> list[Message]:
"""
remember the most similar k memories from observed Messages, return all when k=0
1. remember the short-term memory(stm) news
2. integrate the stm news with ltm(long-term memory) news
"""
stm_news = super(LongTermMemory, self).remember(observed) # shot-term memory news
stm_news = super(LongTermMemory, self).remember(observed, k=k) # shot-term memory news
if not self.memory_storage.is_initialized:
# memory_storage hasn't initialized, use default `remember` to get stm_news
return stm_news

View file

@ -63,7 +63,7 @@ class Memory:
"""Return the most recent k memories, return all when k=0"""
return self.storage[-k:]
def remember(self, observed: list[Message], k=10) -> list[Message]:
def remember(self, observed: list[Message], k=0) -> list[Message]:
"""remember the most recent k memories from observed Messages, return all when k=0"""
already_observed = self.get(k)
news: list[Message] = []

View file

@ -141,7 +141,8 @@ class Engineer(Role):
msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo))
self._rc.memory.add(msg)
code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
code_msg = todo + FILENAME_CODE_SEP + str(file_path)
code_msg_all.append(code_msg)
logger.info(f'Done {self.get_workspace()} generating.')
msg = Message(
@ -188,7 +189,8 @@ class Engineer(Role):
msg = Message(content=code, role=self.profile, cause_by=WriteCode)
self._rc.memory.add(msg)
code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
code_msg = todo + FILENAME_CODE_SEP + str(file_path)
code_msg_all.append(code_msg)
logger.info(f'Done {self.get_workspace()} generating.')
msg = Message(

View file

@ -15,7 +15,7 @@ from metagpt.logs import logger
from metagpt.roles import Role
from metagpt.schema import Message
from metagpt.roles.engineer import Engineer
from metagpt.utils.common import CodeParser
from metagpt.utils.common import CodeParser, parse_recipient
from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP
class QaEngineer(Role):
@ -24,7 +24,9 @@ class QaEngineer(Role):
constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain"):
super().__init__(name, profile, goal, constraints)
self._init_actions([WriteTest])
self._watch([WriteCode])
self._watch([WriteCode, WriteTest, RunCode, DebugError])
self.test_round = 0
self.test_round_allowed = 5 # hard code for 1 WriteTest round + 2 x (RunCode -> DebugError loop)
@classmethod
def parse_workspace(cls, system_design_msg: Message) -> str:
@ -38,12 +40,11 @@ class QaEngineer(Role):
return WORKSPACE_ROOT / 'src'
workspace = self.parse_workspace(msg)
# project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
# source codes directory: workspace/{package_name}/{package_name}
if return_proj_dir:
return WORKSPACE_ROOT / workspace
# development codes directory: workspace/{package_name}/{package_name}
return WORKSPACE_ROOT / workspace / workspace
def write_file(self, filename: str, code: str):
workspace = self.get_workspace() / 'tests'
file = workspace / filename
@ -60,8 +61,12 @@ class QaEngineer(Role):
for code_msg in code_msgs:
# write tests
file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP)
file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
code_to_test = open(file_path, "r").read()
if "test" in file_name:
continue # Engineer might write some test files, skip testing a test file
test_file_name = "test_" + file_name
test_file_path = self.get_workspace() / "tests" / test_file_name
logger.info(f'Writing {test_file_name}..')
test_code = await WriteTest().run(
code_to_test=code_to_test,
@ -72,48 +77,86 @@ class QaEngineer(Role):
)
self.write_file(test_file_name, test_code)
# run tests
result_msg = await RunCode().run(
mode="script",
code=code_to_test,
code_file_name=file_name,
test_code=test_code,
test_file_name=test_file_name,
command=['python', f'tests/{test_file_name}'],
working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here
additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name,
# import statement inside package code needs this
# prepare context for run tests in next round
command = ['python', f'tests/{test_file_name}']
file_info = {
"file_name": file_name, "file_path": str(file_path),
"test_file_name": test_file_name, "test_file_path": str(test_file_path),
"command": command
}
msg = Message(
content=str(file_info), role=self.profile, cause_by=WriteTest,
sent_from="QaEngineer", send_to="QaEngineer"
)
result_msg_all.append(result_msg)
# RunCode().run(
# mode="script",
# working_directory=self.get_workspace(),
# additional_python_paths=[self.get_workspace(return_proj_dir=False)],
# command=['python', '-m', 'unittest', 'discover', '-s', 'tests']
# )
self._publish_message(msg)
logger.info(f'Done {self.get_workspace()}/tests generating.')
msg_content = MSG_SEP.join(result_msg_all)
msg = Message(content=msg_content, role=self.profile, cause_by=RunCode, send_to=QaEngineer)
return msg
async def _run_code(self, msg):
file_info = eval(msg.content)
code_to_test = open(file_info["file_path"], "r").read()
test_code = open(file_info["test_file_path"], "r").read()
proj_dir = self.get_workspace()
development_code_dir = self.get_workspace(return_proj_dir=False)
result_msg = await RunCode().run(
mode="script",
code=code_to_test,
code_file_name=file_info["file_name"],
test_code=test_code,
test_file_name=file_info["test_file_name"],
command=file_info["command"],
working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here
additional_python_paths=[development_code_dir], # workspace/package_name/package_name,
# import statement inside package code needs this
)
recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself
content = str(file_info) + FILENAME_CODE_SEP + result_msg
msg = Message(
content=content, role=self.profile, cause_by=RunCode,
sent_from="QaEngineer", send_to=recipient
)
self._publish_message(msg)
async def _debug_error(self, msg):
# process the msg, if the code works fine, no need to debug
# else: debug and rewrite the code
pass
file_info, context = msg.content.split(FILENAME_CODE_SEP)
file_name, code = await DebugError().run(context)
if file_name:
self.write_file(file_name, code)
recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self
msg = Message(content=file_info, role=self.profile, cause_by=DebugError, sent_from="QaEngineer", send_to=recipient)
self._publish_message(msg)
async def _observe(self) -> int:
await super()._observe()
self._rc.news = [msg for msg in self._rc.news \
if msg.send_to == "QaEngineer"] # only relevant msgs count as observed news
return len(self._rc.news)
async def _act(self) -> Message:
if self.test_round > self.test_round_allowed:
result_msg = Message(
content=f"Exceeding {self.test_round_allowed} rounds of tests, skip (writing code counts as a round, too)",
role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
)
return result_msg
for msg in self._rc.news:
if msg.send_to != "QaEngineer":
continue
# Decide what to do based on observed msg type, currently defined by human,
# might potentially be moved to _think, that is, let the agent decides for itself
if msg.cause_by == WriteCode:
# engineer wrote a code, write a test for it
# engineer wrote a code, time to write a test for it
result_msg = await self._write_test(msg)
elif msg.cause_by in [WriteTest, DebugError]:
# I wrote or debugged my test code, time to run it
result_msg = await self._run_code(msg)
elif msg.cause_by == RunCode:
# I wrote and ran my test code, fix bugs, if any
# I ran my test code, time to fix bugs, if any
result_msg = await self._debug_error(msg)
self.test_round += 1
result_msg = Message(
content=f"Round {self.test_round} of tests done",
role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
)
return result_msg

View file

@ -27,6 +27,7 @@ class Message:
instruct_content: BaseModel = field(default=None)
role: str = field(default='user') # system / user / assistant
cause_by: Type["Action"] = field(default="")
sent_from: str = field(default="")
send_to: str = field(default="")
def __str__(self):

View file

@ -183,7 +183,7 @@ class CodeParser:
def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]:
# Regular expression pattern to find the tasks list.
code = cls.parse_code(block, text, lang)
print(code)
# print(code)
pattern = r'\s*(.*=.*)?(\[.*\])'
# Extract tasks list string using regex.
@ -230,3 +230,8 @@ def print_members(module, indent=0):
print(f'{prefix}Function: {name}')
elif inspect.ismethod(obj):
print(f'{prefix}Method: {name}')
def parse_recipient(text):
pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
recipient = re.search(pattern, text)
return recipient.group(1) if recipient else ""

View file

@ -135,7 +135,7 @@ FAILED (failures=1)
;
## instruction:
The error is in the development code, specifically in the calculate_score method of the Player class. The method is not correctly handling the case where there are multiple Aces in the player's hand. The current implementation only subtracts 10 from the score once if the score is over 21 and there's an Ace in the hand. However, in the case of multiple Aces, it should subtract 10 for each Ace until the score is 21 or less.
## File To Rewrite
## File To Rewrite:
player.py
## Status:
FAIL
@ -151,6 +151,6 @@ async def test_debug_error():
debug_error = DebugError("debug_error")
result = await debug_error.run(context=EXAMPLE_MSG)
file_name, rewritten_code = await debug_error.run(context=EXAMPLE_MSG_CONTENT)
assert "```python" in result
assert "class TestPlayer" in rewritten_code