mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-27 14:25:20 +02:00
1. RunCode -> DebugError loop done; 2. modify default k for memory search
This commit is contained in:
parent
6bf527d31e
commit
8abdca3057
10 changed files with 140 additions and 70 deletions
|
|
@ -5,7 +5,9 @@
|
|||
@Author : alexanderwu
|
||||
@File : debug_error.py
|
||||
"""
|
||||
import re
|
||||
from metagpt.actions.action import Action
|
||||
from metagpt.utils.common import CodeParser
|
||||
|
||||
PROMPT_TEMPLATE = """
|
||||
NOTICE
|
||||
|
|
@ -21,7 +23,7 @@ Now you should start rewriting the code:
|
|||
## file name of the code to rewrite: Write code with triple quoto. Do your best to implement THIS ONLY ONE FILE.
|
||||
"""
|
||||
class DebugError(Action):
|
||||
def __init__(self, name, context=None, llm=None):
|
||||
def __init__(self, name="DebugError", context=None, llm=None):
|
||||
super().__init__(name, context, llm)
|
||||
|
||||
# async def run(self, code, error):
|
||||
|
|
@ -31,8 +33,15 @@ class DebugError(Action):
|
|||
# return fixed_code
|
||||
|
||||
async def run(self, context):
|
||||
if "PASS" in context:
|
||||
return "", "the original code works fine, no need to debug"
|
||||
|
||||
file_name = re.search("## File To Rewrite:\s*(.+\\.py)", context).group(1)
|
||||
|
||||
prompt = PROMPT_TEMPLATE.format(context=context)
|
||||
|
||||
rsp = await self._aask(prompt)
|
||||
|
||||
return rsp
|
||||
code = CodeParser.parse_code(block="", text=rsp)
|
||||
|
||||
return file_name, code
|
||||
|
|
|
|||
|
|
@ -16,8 +16,25 @@ PROMPT_TEMPLATE = """
|
|||
Role: You are a senior development and qa engineer, your role is summarize the code running result.
|
||||
If the running result does not include an error, you should explicitly approve the result.
|
||||
On the other hand, if the running result indicates some error, you should point out which part, the development code or the test code, produces the error,
|
||||
and give specific instructions on fixing the errors.
|
||||
and give specific instructions on fixing the errors. Here is the code info:
|
||||
{context}
|
||||
Now you should begin your analysis
|
||||
---
|
||||
## instruction:
|
||||
Please summarize the cause of the errors and give correction instruction
|
||||
## File To Rewrite:
|
||||
Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
|
||||
## Status:
|
||||
Determine if all of the code works fine, if so write PASS, else FAIL,
|
||||
WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
|
||||
## Send To:
|
||||
Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
|
||||
WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
|
||||
---
|
||||
You should fill in necessary instruction, status, send to, and finally return all content between the --- segment line.
|
||||
"""
|
||||
|
||||
CONTEXT = """
|
||||
## Development Code File Name
|
||||
{code_file_name}
|
||||
## Development Code
|
||||
|
|
@ -35,18 +52,6 @@ and give specific instructions on fixing the errors.
|
|||
## Running Output
|
||||
standard output: {outs};
|
||||
standard errors: {errs};
|
||||
## instruction:
|
||||
Please summarize the cause of the errors and give correction instruction
|
||||
## File To Rewrite
|
||||
Determine the ONE file to rewrite in order to fix the error, for example, xyz.py, or test_xyz.py
|
||||
## Status:
|
||||
Determine if all of the code works fine, if so write PASS, else FAIL,
|
||||
WRITE ONLY ONE WORD, PASS OR FAIL, IN THI SECTION
|
||||
## Send To:
|
||||
Please write Engineer if the errors are due to problematic development codes, and QaEngineer to problematic test codes, and NoOne if there are no errors,
|
||||
WRITE ONLY ONE WORD, Engineer OR QaEngineer OR NoOne, IN THIS SECTION.
|
||||
---
|
||||
You should fill in necessary summary, status, send to, and finally return all content between the --- segment line.
|
||||
"""
|
||||
|
||||
class RunCode(Action):
|
||||
|
|
@ -97,15 +102,20 @@ class RunCode(Action):
|
|||
elif mode == "text":
|
||||
outs, errs = await self.run_text(code=code)
|
||||
|
||||
logger.info(outs)
|
||||
logger.info(errs)
|
||||
|
||||
prompt = PROMPT_TEMPLATE.format(
|
||||
logger.info(f"{outs=}")
|
||||
logger.info(f"{errs=}")
|
||||
|
||||
context = CONTEXT.format(
|
||||
code=code, code_file_name=code_file_name,
|
||||
test_code=test_code, test_file_name=test_file_name,
|
||||
command=" ".join(command),
|
||||
outs=outs, errs=errs
|
||||
outs=outs[:500], # outs might be long but they are not important, truncate them to avoid token overflow
|
||||
errs=errs
|
||||
)
|
||||
|
||||
prompt = PROMPT_TEMPLATE.format(context=context)
|
||||
rsp = await self._aask(prompt)
|
||||
|
||||
return rsp
|
||||
result = context + rsp
|
||||
|
||||
return result
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ you should correctly import the necessary classes based on these file locations!
|
|||
"""
|
||||
|
||||
class WriteTest(Action):
|
||||
def __init__(self, name="", context=None, llm=None):
|
||||
def __init__(self, name="WriteTest", context=None, llm=None):
|
||||
super().__init__(name, context, llm)
|
||||
|
||||
async def write_code(self, prompt):
|
||||
|
|
|
|||
|
|
@ -43,13 +43,13 @@ class LongTermMemory(Memory):
|
|||
# and ignore adding messages from recover repeatedly
|
||||
self.memory_storage.add(message)
|
||||
|
||||
def remember(self, observed: list[Message], k=10) -> list[Message]:
|
||||
def remember(self, observed: list[Message], k=0) -> list[Message]:
|
||||
"""
|
||||
remember the most similar k memories from observed Messages, return all when k=0
|
||||
1. remember the short-term memory(stm) news
|
||||
2. integrate the stm news with ltm(long-term memory) news
|
||||
"""
|
||||
stm_news = super(LongTermMemory, self).remember(observed) # shot-term memory news
|
||||
stm_news = super(LongTermMemory, self).remember(observed, k=k) # shot-term memory news
|
||||
if not self.memory_storage.is_initialized:
|
||||
# memory_storage hasn't initialized, use default `remember` to get stm_news
|
||||
return stm_news
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ class Memory:
|
|||
"""Return the most recent k memories, return all when k=0"""
|
||||
return self.storage[-k:]
|
||||
|
||||
def remember(self, observed: list[Message], k=10) -> list[Message]:
|
||||
def remember(self, observed: list[Message], k=0) -> list[Message]:
|
||||
"""remember the most recent k memories from observed Messages, return all when k=0"""
|
||||
already_observed = self.get(k)
|
||||
news: list[Message] = []
|
||||
|
|
|
|||
|
|
@ -141,7 +141,8 @@ class Engineer(Role):
|
|||
msg = Message(content=code, role=self.profile, cause_by=type(self._rc.todo))
|
||||
self._rc.memory.add(msg)
|
||||
|
||||
code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
|
||||
code_msg = todo + FILENAME_CODE_SEP + str(file_path)
|
||||
code_msg_all.append(code_msg)
|
||||
|
||||
logger.info(f'Done {self.get_workspace()} generating.')
|
||||
msg = Message(
|
||||
|
|
@ -188,7 +189,8 @@ class Engineer(Role):
|
|||
msg = Message(content=code, role=self.profile, cause_by=WriteCode)
|
||||
self._rc.memory.add(msg)
|
||||
|
||||
code_msg_all.append(FILENAME_CODE_SEP.join([todo, str(file_path), code]))
|
||||
code_msg = todo + FILENAME_CODE_SEP + str(file_path)
|
||||
code_msg_all.append(code_msg)
|
||||
|
||||
logger.info(f'Done {self.get_workspace()} generating.')
|
||||
msg = Message(
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from metagpt.logs import logger
|
|||
from metagpt.roles import Role
|
||||
from metagpt.schema import Message
|
||||
from metagpt.roles.engineer import Engineer
|
||||
from metagpt.utils.common import CodeParser
|
||||
from metagpt.utils.common import CodeParser, parse_recipient
|
||||
from metagpt.utils.special_tokens import MSG_SEP, FILENAME_CODE_SEP
|
||||
|
||||
class QaEngineer(Role):
|
||||
|
|
@ -24,7 +24,9 @@ class QaEngineer(Role):
|
|||
constraints="The test code you write should conform to code standard like PEP8, be modular, easy to read and maintain"):
|
||||
super().__init__(name, profile, goal, constraints)
|
||||
self._init_actions([WriteTest])
|
||||
self._watch([WriteCode])
|
||||
self._watch([WriteCode, WriteTest, RunCode, DebugError])
|
||||
self.test_round = 0
|
||||
self.test_round_allowed = 5 # hard code for 1 WriteTest round + 2 x (RunCode -> DebugError loop)
|
||||
|
||||
@classmethod
|
||||
def parse_workspace(cls, system_design_msg: Message) -> str:
|
||||
|
|
@ -38,12 +40,11 @@ class QaEngineer(Role):
|
|||
return WORKSPACE_ROOT / 'src'
|
||||
workspace = self.parse_workspace(msg)
|
||||
# project directory: workspace/{package_name}, which contains package source code folder, tests folder, resources folder, etc.
|
||||
# source codes directory: workspace/{package_name}/{package_name}
|
||||
if return_proj_dir:
|
||||
return WORKSPACE_ROOT / workspace
|
||||
# development codes directory: workspace/{package_name}/{package_name}
|
||||
return WORKSPACE_ROOT / workspace / workspace
|
||||
|
||||
|
||||
|
||||
def write_file(self, filename: str, code: str):
|
||||
workspace = self.get_workspace() / 'tests'
|
||||
file = workspace / filename
|
||||
|
|
@ -60,8 +61,12 @@ class QaEngineer(Role):
|
|||
for code_msg in code_msgs:
|
||||
|
||||
# write tests
|
||||
file_name, file_path, code_to_test = code_msg.split(FILENAME_CODE_SEP)
|
||||
file_name, file_path = code_msg.split(FILENAME_CODE_SEP)
|
||||
code_to_test = open(file_path, "r").read()
|
||||
if "test" in file_name:
|
||||
continue # Engineer might write some test files, skip testing a test file
|
||||
test_file_name = "test_" + file_name
|
||||
test_file_path = self.get_workspace() / "tests" / test_file_name
|
||||
logger.info(f'Writing {test_file_name}..')
|
||||
test_code = await WriteTest().run(
|
||||
code_to_test=code_to_test,
|
||||
|
|
@ -72,48 +77,86 @@ class QaEngineer(Role):
|
|||
)
|
||||
self.write_file(test_file_name, test_code)
|
||||
|
||||
# run tests
|
||||
result_msg = await RunCode().run(
|
||||
mode="script",
|
||||
code=code_to_test,
|
||||
code_file_name=file_name,
|
||||
test_code=test_code,
|
||||
test_file_name=test_file_name,
|
||||
command=['python', f'tests/{test_file_name}'],
|
||||
working_directory=self.get_workspace(), # workspace/package_name, will run tests/test_xxx.py here
|
||||
additional_python_paths=[self.get_workspace(return_proj_dir=False)], # workspace/package_name/package_name,
|
||||
# import statement inside package code needs this
|
||||
# prepare context for run tests in next round
|
||||
command = ['python', f'tests/{test_file_name}']
|
||||
file_info = {
|
||||
"file_name": file_name, "file_path": str(file_path),
|
||||
"test_file_name": test_file_name, "test_file_path": str(test_file_path),
|
||||
"command": command
|
||||
}
|
||||
msg = Message(
|
||||
content=str(file_info), role=self.profile, cause_by=WriteTest,
|
||||
sent_from="QaEngineer", send_to="QaEngineer"
|
||||
)
|
||||
|
||||
result_msg_all.append(result_msg)
|
||||
|
||||
# RunCode().run(
|
||||
# mode="script",
|
||||
# working_directory=self.get_workspace(),
|
||||
# additional_python_paths=[self.get_workspace(return_proj_dir=False)],
|
||||
# command=['python', '-m', 'unittest', 'discover', '-s', 'tests']
|
||||
# )
|
||||
self._publish_message(msg)
|
||||
|
||||
logger.info(f'Done {self.get_workspace()}/tests generating.')
|
||||
msg_content = MSG_SEP.join(result_msg_all)
|
||||
msg = Message(content=msg_content, role=self.profile, cause_by=RunCode, send_to=QaEngineer)
|
||||
return msg
|
||||
|
||||
async def _run_code(self, msg):
|
||||
file_info = eval(msg.content)
|
||||
code_to_test = open(file_info["file_path"], "r").read()
|
||||
test_code = open(file_info["test_file_path"], "r").read()
|
||||
proj_dir = self.get_workspace()
|
||||
development_code_dir = self.get_workspace(return_proj_dir=False)
|
||||
|
||||
result_msg = await RunCode().run(
|
||||
mode="script",
|
||||
code=code_to_test,
|
||||
code_file_name=file_info["file_name"],
|
||||
test_code=test_code,
|
||||
test_file_name=file_info["test_file_name"],
|
||||
command=file_info["command"],
|
||||
working_directory=proj_dir, # workspace/package_name, will run tests/test_xxx.py here
|
||||
additional_python_paths=[development_code_dir], # workspace/package_name/package_name,
|
||||
# import statement inside package code needs this
|
||||
)
|
||||
|
||||
recipient = parse_recipient(result_msg) # the recipient might be Engineer or myself
|
||||
content = str(file_info) + FILENAME_CODE_SEP + result_msg
|
||||
msg = Message(
|
||||
content=content, role=self.profile, cause_by=RunCode,
|
||||
sent_from="QaEngineer", send_to=recipient
|
||||
)
|
||||
self._publish_message(msg)
|
||||
|
||||
async def _debug_error(self, msg):
|
||||
# process the msg, if the code works fine, no need to debug
|
||||
|
||||
# else: debug and rewrite the code
|
||||
|
||||
pass
|
||||
file_info, context = msg.content.split(FILENAME_CODE_SEP)
|
||||
file_name, code = await DebugError().run(context)
|
||||
if file_name:
|
||||
self.write_file(file_name, code)
|
||||
recipient = msg.sent_from # send back to the one who ran the code for another run, might be one's self
|
||||
msg = Message(content=file_info, role=self.profile, cause_by=DebugError, sent_from="QaEngineer", send_to=recipient)
|
||||
self._publish_message(msg)
|
||||
|
||||
async def _observe(self) -> int:
|
||||
await super()._observe()
|
||||
self._rc.news = [msg for msg in self._rc.news \
|
||||
if msg.send_to == "QaEngineer"] # only relevant msgs count as observed news
|
||||
return len(self._rc.news)
|
||||
|
||||
async def _act(self) -> Message:
|
||||
if self.test_round > self.test_round_allowed:
|
||||
result_msg = Message(
|
||||
content=f"Exceeding {self.test_round_allowed} rounds of tests, skip (writing code counts as a round, too)",
|
||||
role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
|
||||
)
|
||||
return result_msg
|
||||
|
||||
for msg in self._rc.news:
|
||||
if msg.send_to != "QaEngineer":
|
||||
continue
|
||||
# Decide what to do based on observed msg type, currently defined by human,
|
||||
# might potentially be moved to _think, that is, let the agent decides for itself
|
||||
if msg.cause_by == WriteCode:
|
||||
# engineer wrote a code, write a test for it
|
||||
# engineer wrote a code, time to write a test for it
|
||||
result_msg = await self._write_test(msg)
|
||||
elif msg.cause_by in [WriteTest, DebugError]:
|
||||
# I wrote or debugged my test code, time to run it
|
||||
result_msg = await self._run_code(msg)
|
||||
elif msg.cause_by == RunCode:
|
||||
# I wrote and ran my test code, fix bugs, if any
|
||||
# I ran my test code, time to fix bugs, if any
|
||||
result_msg = await self._debug_error(msg)
|
||||
|
||||
self.test_round += 1
|
||||
result_msg = Message(
|
||||
content=f"Round {self.test_round} of tests done",
|
||||
role=self.profile, cause_by=WriteTest, sent_from="QaEngineer", send_to=""
|
||||
)
|
||||
return result_msg
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ class Message:
|
|||
instruct_content: BaseModel = field(default=None)
|
||||
role: str = field(default='user') # system / user / assistant
|
||||
cause_by: Type["Action"] = field(default="")
|
||||
sent_from: str = field(default="")
|
||||
send_to: str = field(default="")
|
||||
|
||||
def __str__(self):
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ class CodeParser:
|
|||
def parse_file_list(cls, block: str, text: str, lang: str = "") -> list[str]:
|
||||
# Regular expression pattern to find the tasks list.
|
||||
code = cls.parse_code(block, text, lang)
|
||||
print(code)
|
||||
# print(code)
|
||||
pattern = r'\s*(.*=.*)?(\[.*\])'
|
||||
|
||||
# Extract tasks list string using regex.
|
||||
|
|
@ -230,3 +230,8 @@ def print_members(module, indent=0):
|
|||
print(f'{prefix}Function: {name}')
|
||||
elif inspect.ismethod(obj):
|
||||
print(f'{prefix}Method: {name}')
|
||||
|
||||
def parse_recipient(text):
|
||||
pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
|
||||
recipient = re.search(pattern, text)
|
||||
return recipient.group(1) if recipient else ""
|
||||
|
|
|
|||
|
|
@ -135,7 +135,7 @@ FAILED (failures=1)
|
|||
;
|
||||
## instruction:
|
||||
The error is in the development code, specifically in the calculate_score method of the Player class. The method is not correctly handling the case where there are multiple Aces in the player's hand. The current implementation only subtracts 10 from the score once if the score is over 21 and there's an Ace in the hand. However, in the case of multiple Aces, it should subtract 10 for each Ace until the score is 21 or less.
|
||||
## File To Rewrite
|
||||
## File To Rewrite:
|
||||
player.py
|
||||
## Status:
|
||||
FAIL
|
||||
|
|
@ -151,6 +151,6 @@ async def test_debug_error():
|
|||
|
||||
debug_error = DebugError("debug_error")
|
||||
|
||||
result = await debug_error.run(context=EXAMPLE_MSG)
|
||||
file_name, rewritten_code = await debug_error.run(context=EXAMPLE_MSG_CONTENT)
|
||||
|
||||
assert "```python" in result
|
||||
assert "class TestPlayer" in rewritten_code
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue