diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index 0b86ac1bb..feb15657d 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -23,11 +23,7 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential from metagpt.actions.action import Action from metagpt.actions.project_management_an import REFINED_TASK_LIST, TASK_LIST from metagpt.actions.write_code_plan_and_change_an import REFINED_TEMPLATE -from metagpt.const import ( - BUGFIX_FILENAME, - CODE_PLAN_AND_CHANGE_FILENAME, - REQUIREMENT_FILENAME, -) +from metagpt.const import BUGFIX_FILENAME, REQUIREMENT_FILENAME from metagpt.logs import logger from metagpt.schema import CodingContext, Document, RunCodeResult from metagpt.utils.common import CodeParser @@ -98,8 +94,6 @@ class WriteCode(Action): bug_feedback = await self.repo.docs.get(filename=BUGFIX_FILENAME) coding_context = CodingContext.loads(self.i_context.content) test_doc = await self.repo.test_outputs.get(filename="test_" + coding_context.filename + ".json") - code_plan_and_change_doc = await self.repo.docs.code_plan_and_change.get(filename=CODE_PLAN_AND_CHANGE_FILENAME) - code_plan_and_change = code_plan_and_change_doc.content if code_plan_and_change_doc else "" requirement_doc = await self.repo.docs.get(filename=REQUIREMENT_FILENAME) summary_doc = None if coding_context.design_doc and coding_context.design_doc.filename: @@ -111,7 +105,7 @@ class WriteCode(Action): if bug_feedback: code_context = coding_context.code_doc.content - elif code_plan_and_change: + elif self.config.inc: code_context = await self.get_codes( coding_context.task_doc, exclude=self.i_context.filename, project_repo=self.repo, use_inc=True ) @@ -122,10 +116,10 @@ class WriteCode(Action): project_repo=self.repo.with_src_path(self.context.src_workspace), ) - if code_plan_and_change: + if self.config.inc: prompt = REFINED_TEMPLATE.format( user_requirement=requirement_doc.content if requirement_doc else "", - code_plan_and_change=code_plan_and_change, + code_plan_and_change=str(coding_context.code_plan_and_change_doc), design=coding_context.design_doc.content if coding_context.design_doc else "", task=coding_context.task_doc.content if coding_context.task_doc else "", code=code_context, diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index da636eb36..ac6fe7045 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -13,7 +13,7 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential from metagpt.actions import WriteCode from metagpt.actions.action import Action -from metagpt.const import CODE_PLAN_AND_CHANGE_FILENAME, REQUIREMENT_FILENAME +from metagpt.const import REQUIREMENT_FILENAME from metagpt.logs import logger from metagpt.schema import CodingContext from metagpt.utils.common import CodeParser @@ -149,29 +149,21 @@ class WriteCodeReview(Action): use_inc=self.config.inc, ) - if not self.config.inc: - context = "\n".join( - [ - "## System Design\n" + str(self.i_context.design_doc) + "\n", - "## Task\n" + task_content + "\n", - "## Code Files\n" + code_context + "\n", - ] - ) - else: + ctx_list = [ + "## System Design\n" + str(self.i_context.design_doc) + "\n", + "## Task\n" + task_content + "\n", + "## Code Files\n" + code_context + "\n", + ] + if self.config.inc: requirement_doc = await self.repo.docs.get(filename=REQUIREMENT_FILENAME) - code_plan_and_change_doc = await self.repo.get(filename=CODE_PLAN_AND_CHANGE_FILENAME) - context = "\n".join( - [ - "## User New Requirements\n" + str(requirement_doc) + "\n", - "## Code Plan And Change\n" + str(code_plan_and_change_doc) + "\n", - "## System Design\n" + str(self.i_context.design_doc) + "\n", - "## Task\n" + task_content + "\n", - "## Code Files\n" + code_context + "\n", - ] - ) + insert_ctx_list = [ + "## User New Requirements\n" + str(requirement_doc) + "\n", + "## Code Plan And Change\n" + str(self.i_context.code_plan_and_change_doc) + "\n", + ] + ctx_list = insert_ctx_list + ctx_list context_prompt = PROMPT_TEMPLATE.format( - context=context, + context="\n".join(ctx_list), code=iterative_code, filename=self.i_context.code_doc.filename, ) diff --git a/metagpt/const.py b/metagpt/const.py index 2cffaa804..79c33a84d 100644 --- a/metagpt/const.py +++ b/metagpt/const.py @@ -84,7 +84,6 @@ MESSAGE_ROUTE_TO_NONE = "" REQUIREMENT_FILENAME = "requirement.txt" BUGFIX_FILENAME = "bugfix.txt" PACKAGE_REQUIREMENTS_FILENAME = "requirements.txt" -CODE_PLAN_AND_CHANGE_FILENAME = "code_plan_and_change.json" DOCS_FILE_REPO = "docs" PRDS_FILE_REPO = "docs/prd" diff --git a/metagpt/memory/memory_storage.py b/metagpt/memory/memory_storage.py index c029d027b..fa04d8138 100644 --- a/metagpt/memory/memory_storage.py +++ b/metagpt/memory/memory_storage.py @@ -7,7 +7,6 @@ from pathlib import Path from typing import Optional -from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores.faiss import FAISS from langchain_core.embeddings import Embeddings @@ -15,6 +14,7 @@ from metagpt.const import DATA_PATH, MEM_TTL from metagpt.document_store.faiss_store import FaissStore from metagpt.logs import logger from metagpt.schema import Message +from metagpt.utils.embedding import get_embedding from metagpt.utils.serialize import deserialize_message, serialize_message @@ -30,7 +30,7 @@ class MemoryStorage(FaissStore): self.threshold: float = 0.1 # experience value. TODO The threshold to filter similar memories self._initialized: bool = False - self.embedding = embedding or OpenAIEmbeddings() + self.embedding = embedding or get_embedding() self.store: FAISS = None # Faiss engine @property diff --git a/metagpt/roles/engineer.py b/metagpt/roles/engineer.py index 40ade2110..7037ca0b3 100644 --- a/metagpt/roles/engineer.py +++ b/metagpt/roles/engineer.py @@ -20,7 +20,6 @@ from __future__ import annotations import json -import os from collections import defaultdict from pathlib import Path from typing import Set @@ -32,7 +31,6 @@ from metagpt.actions.summarize_code import SummarizeCode from metagpt.actions.write_code_plan_and_change_an import WriteCodePlanAndChange from metagpt.const import ( CODE_PLAN_AND_CHANGE_FILE_REPO, - CODE_PLAN_AND_CHANGE_FILENAME, REQUIREMENT_FILENAME, SYSTEM_DESIGN_FILE_REPO, TASK_FILE_REPO, @@ -119,10 +117,10 @@ class Engineer(Role): dependencies = {coding_context.design_doc.root_relative_path, coding_context.task_doc.root_relative_path} if self.config.inc: - dependencies.add(os.path.join(CODE_PLAN_AND_CHANGE_FILE_REPO, CODE_PLAN_AND_CHANGE_FILENAME)) + dependencies.add(coding_context.code_plan_and_change_doc.root_relative_path) await self.project_repo.srcs.save( filename=coding_context.filename, - dependencies=dependencies, + dependencies=list(dependencies), content=coding_context.code_doc.content, ) msg = Message( @@ -215,11 +213,12 @@ class Engineer(Role): self.rc.todo.i_context.design_filename, self.rc.todo.i_context.task_filename, } + code_plan_and_change_filepath = Path(self.rc.todo.i_context.design_filename) await self.project_repo.docs.code_plan_and_change.save( - filename=self.rc.todo.i_context.filename, content=code_plan_and_change, dependencies=dependencies + filename=code_plan_and_change_filepath.name, content=code_plan_and_change, dependencies=dependencies ) await self.project_repo.resources.code_plan_and_change.save( - filename=Path(self.rc.todo.i_context.filename).with_suffix(".md").name, + filename=code_plan_and_change_filepath.with_suffix(".md").name, content=node.content, dependencies=dependencies, ) @@ -269,15 +268,24 @@ class Engineer(Role): dependencies = {Path(i) for i in await dependency.get(old_code_doc.root_relative_path)} task_doc = None design_doc = None + code_plan_and_change_doc = None for i in dependencies: if str(i.parent) == TASK_FILE_REPO: task_doc = await self.project_repo.docs.task.get(i.name) elif str(i.parent) == SYSTEM_DESIGN_FILE_REPO: design_doc = await self.project_repo.docs.system_design.get(i.name) + elif str(i.parent) == CODE_PLAN_AND_CHANGE_FILE_REPO: + code_plan_and_change_doc = await self.project_repo.docs.code_plan_and_change.get(i.name) if not task_doc or not design_doc: logger.error(f'Detected source code "{filename}" from an unknown origin.') raise ValueError(f'Detected source code "{filename}" from an unknown origin.') - context = CodingContext(filename=filename, design_doc=design_doc, task_doc=task_doc, code_doc=old_code_doc) + context = CodingContext( + filename=filename, + design_doc=design_doc, + task_doc=task_doc, + code_doc=old_code_doc, + code_plan_and_change_doc=code_plan_and_change_doc, + ) return context async def _new_coding_doc(self, filename, dependency): @@ -296,6 +304,7 @@ class Engineer(Role): for filename in changed_task_files: design_doc = await self.project_repo.docs.system_design.get(filename) task_doc = await self.project_repo.docs.task.get(filename) + code_plan_and_change_doc = await self.project_repo.docs.code_plan_and_change.get(filename) task_list = self._parse_tasks(task_doc) for task_filename in task_list: old_code_doc = await self.project_repo.srcs.get(task_filename) @@ -303,9 +312,18 @@ class Engineer(Role): old_code_doc = Document( root_path=str(self.project_repo.src_relative_path), filename=task_filename, content="" ) - context = CodingContext( - filename=task_filename, design_doc=design_doc, task_doc=task_doc, code_doc=old_code_doc - ) + if not code_plan_and_change_doc: + context = CodingContext( + filename=task_filename, design_doc=design_doc, task_doc=task_doc, code_doc=old_code_doc + ) + else: + context = CodingContext( + filename=task_filename, + design_doc=design_doc, + task_doc=task_doc, + code_doc=old_code_doc, + code_plan_and_change_doc=code_plan_and_change_doc, + ) coding_doc = Document( root_path=str(self.project_repo.src_relative_path), filename=task_filename, diff --git a/metagpt/schema.py b/metagpt/schema.py index 15854f676..7bbb567b9 100644 --- a/metagpt/schema.py +++ b/metagpt/schema.py @@ -37,7 +37,6 @@ from pydantic import ( ) from metagpt.const import ( - CODE_PLAN_AND_CHANGE_FILENAME, MESSAGE_ROUTE_CAUSE_BY, MESSAGE_ROUTE_FROM, MESSAGE_ROUTE_TO, @@ -613,6 +612,7 @@ class CodingContext(BaseContext): design_doc: Optional[Document] = None task_doc: Optional[Document] = None code_doc: Optional[Document] = None + code_plan_and_change_doc: Optional[Document] = None class TestingContext(BaseContext): @@ -667,7 +667,6 @@ class BugFixContext(BaseContext): class CodePlanAndChangeContext(BaseModel): - filename: str = CODE_PLAN_AND_CHANGE_FILENAME requirement: str = "" prd_filename: str = "" design_filename: str = "" diff --git a/tests/data/incremental_dev_project/Gomoku.zip b/tests/data/incremental_dev_project/Gomoku.zip index 23649565a..a110ccdd2 100644 Binary files a/tests/data/incremental_dev_project/Gomoku.zip and b/tests/data/incremental_dev_project/Gomoku.zip differ diff --git a/tests/data/incremental_dev_project/dice_simulator_new.zip b/tests/data/incremental_dev_project/dice_simulator_new.zip index 4752ab4c5..377eaa4f5 100644 Binary files a/tests/data/incremental_dev_project/dice_simulator_new.zip and b/tests/data/incremental_dev_project/dice_simulator_new.zip differ diff --git a/tests/data/incremental_dev_project/number_guessing_game.zip b/tests/data/incremental_dev_project/number_guessing_game.zip index 7bbe07713..9d192c1e6 100644 Binary files a/tests/data/incremental_dev_project/number_guessing_game.zip and b/tests/data/incremental_dev_project/number_guessing_game.zip differ diff --git a/tests/data/incremental_dev_project/pygame_2048.zip b/tests/data/incremental_dev_project/pygame_2048.zip index 93e9cf0fe..d589a59b3 100644 Binary files a/tests/data/incremental_dev_project/pygame_2048.zip and b/tests/data/incremental_dev_project/pygame_2048.zip differ diff --git a/tests/data/incremental_dev_project/simple_add_calculator.zip b/tests/data/incremental_dev_project/simple_add_calculator.zip index e6e73f8f9..945e2af5d 100644 Binary files a/tests/data/incremental_dev_project/simple_add_calculator.zip and b/tests/data/incremental_dev_project/simple_add_calculator.zip differ diff --git a/tests/data/incremental_dev_project/snake_game.zip b/tests/data/incremental_dev_project/snake_game.zip index 997203a18..739c2a1d1 100644 Binary files a/tests/data/incremental_dev_project/snake_game.zip and b/tests/data/incremental_dev_project/snake_game.zip differ diff --git a/tests/data/incremental_dev_project/word_cloud.zip b/tests/data/incremental_dev_project/word_cloud.zip index d8747d14d..01f83f053 100644 Binary files a/tests/data/incremental_dev_project/word_cloud.zip and b/tests/data/incremental_dev_project/word_cloud.zip differ diff --git a/tests/metagpt/document_store/test_faiss_store.py b/tests/metagpt/document_store/test_faiss_store.py index 7e2979bd4..397ba6ce5 100644 --- a/tests/metagpt/document_store/test_faiss_store.py +++ b/tests/metagpt/document_store/test_faiss_store.py @@ -6,6 +6,9 @@ @File : test_faiss_store.py """ +from typing import Optional + +import numpy as np import pytest from metagpt.const import EXAMPLE_PATH @@ -14,8 +17,17 @@ from metagpt.logs import logger from metagpt.roles import Sales +def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]: + num = len(texts) + embeds = np.random.randint(1, 100, size=(num, 1536)) # 1536: openai embedding dim + embeds = (embeds - embeds.mean(axis=0)) / (embeds.std(axis=0)) + return embeds + + @pytest.mark.asyncio -async def test_search_json(): +async def test_search_json(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + store = FaissStore(EXAMPLE_PATH / "example.json") role = Sales(profile="Sales", store=store) query = "Which facial cleanser is good for oily skin?" @@ -24,7 +36,9 @@ async def test_search_json(): @pytest.mark.asyncio -async def test_search_xlsx(): +async def test_search_xlsx(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + store = FaissStore(EXAMPLE_PATH / "example.xlsx") role = Sales(profile="Sales", store=store) query = "Which facial cleanser is good for oily skin?" @@ -33,7 +47,9 @@ async def test_search_xlsx(): @pytest.mark.asyncio -async def test_write(): +async def test_write(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + store = FaissStore(EXAMPLE_PATH / "example.xlsx", meta_col="Answer", content_col="Question") _faiss_store = store.write() assert _faiss_store.docstore diff --git a/tests/metagpt/memory/mock_text_embed.py b/tests/metagpt/memory/mock_text_embed.py new file mode 100644 index 000000000..897c7cf10 --- /dev/null +++ b/tests/metagpt/memory/mock_text_embed.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : + +from typing import Optional + +import numpy as np + +dim = 1536 # openai embedding dim + +text_embed_arr = [ + {"text": "Write a cli snake game", "embed": np.zeros(shape=[1, dim])}, # mock data, same as below + {"text": "Write a game of cli snake", "embed": np.zeros(shape=[1, dim])}, + {"text": "Write a 2048 web game", "embed": np.ones(shape=[1, dim])}, + {"text": "Write a Battle City", "embed": np.ones(shape=[1, dim])}, + { + "text": "The user has requested the creation of a command-line interface (CLI) snake game", + "embed": np.zeros(shape=[1, dim]), + }, + {"text": "The request is command-line interface (CLI) snake game", "embed": np.zeros(shape=[1, dim])}, + { + "text": "Incorporate basic features of a snake game such as scoring and increasing difficulty", + "embed": np.ones(shape=[1, dim]), + }, +] + +text_idx_dict = {item["text"]: idx for idx, item in enumerate(text_embed_arr)} + + +def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]: + idx = text_idx_dict.get(texts[0]) + embed = text_embed_arr[idx].get("embed") + return embed diff --git a/tests/metagpt/memory/test_longterm_memory.py b/tests/metagpt/memory/test_longterm_memory.py index 5c71ddd13..f7e652758 100644 --- a/tests/metagpt/memory/test_longterm_memory.py +++ b/tests/metagpt/memory/test_longterm_memory.py @@ -4,20 +4,22 @@ @Desc : unittest of `metagpt/memory/longterm_memory.py` """ -import os import pytest from metagpt.actions import UserRequirement -from metagpt.config2 import config from metagpt.memory.longterm_memory import LongTermMemory from metagpt.roles.role import RoleContext from metagpt.schema import Message - -os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key) +from tests.metagpt.memory.mock_text_embed import ( + mock_openai_embed_documents, + text_embed_arr, +) -def test_ltm_search(): +def test_ltm_search(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + role_id = "UTUserLtm(Product Manager)" from metagpt.environment import Environment @@ -27,20 +29,20 @@ def test_ltm_search(): ltm = LongTermMemory() ltm.recover_memory(role_id, rc) - idea = "Write a cli snake game" + idea = text_embed_arr[0].get("text", "Write a cli snake game") message = Message(role="User", content=idea, cause_by=UserRequirement) news = ltm.find_news([message]) assert len(news) == 1 ltm.add(message) - sim_idea = "Write a game of cli snake" + sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake") sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement) news = ltm.find_news([sim_message]) assert len(news) == 0 ltm.add(sim_message) - new_idea = "Write a 2048 web game" + new_idea = text_embed_arr[2].get("text", "Write a 2048 web game") new_message = Message(role="User", content=new_idea, cause_by=UserRequirement) news = ltm.find_news([new_message]) assert len(news) == 1 @@ -56,7 +58,7 @@ def test_ltm_search(): news = ltm_new.find_news([sim_message]) assert len(news) == 0 - new_idea = "Write a Battle City" + new_idea = text_embed_arr[3].get("text", "Write a Battle City") new_message = Message(role="User", content=new_idea, cause_by=UserRequirement) news = ltm_new.find_news([new_message]) assert len(news) == 1 diff --git a/tests/metagpt/memory/test_memory_storage.py b/tests/metagpt/memory/test_memory_storage.py index e82a82fc8..28a73276b 100644 --- a/tests/metagpt/memory/test_memory_storage.py +++ b/tests/metagpt/memory/test_memory_storage.py @@ -4,23 +4,25 @@ @Desc : the unittests of metagpt/memory/memory_storage.py """ -import os import shutil from pathlib import Path from typing import List from metagpt.actions import UserRequirement, WritePRD from metagpt.actions.action_node import ActionNode -from metagpt.config2 import config from metagpt.const import DATA_PATH from metagpt.memory.memory_storage import MemoryStorage from metagpt.schema import Message - -os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key) +from tests.metagpt.memory.mock_text_embed import ( + mock_openai_embed_documents, + text_embed_arr, +) -def test_idea_message(): - idea = "Write a cli snake game" +def test_idea_message(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + + idea = text_embed_arr[0].get("text", "Write a cli snake game") role_id = "UTUser1(Product Manager)" message = Message(role="User", content=idea, cause_by=UserRequirement) @@ -33,12 +35,12 @@ def test_idea_message(): memory_storage.add(message) assert memory_storage.is_initialized is True - sim_idea = "Write a game of cli snake" + sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake") sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement) new_messages = memory_storage.search_dissimilar(sim_message) assert len(new_messages) == 0 # similar, return [] - new_idea = "Write a 2048 web game" + new_idea = text_embed_arr[2].get("text", "Write a 2048 web game") new_message = Message(role="User", content=new_idea, cause_by=UserRequirement) new_messages = memory_storage.search_dissimilar(new_message) assert new_messages[0].content == message.content @@ -47,13 +49,17 @@ def test_idea_message(): assert memory_storage.is_initialized is False -def test_actionout_message(): +def test_actionout_message(mocker): + mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents) + out_mapping = {"field1": (str, ...), "field2": (List[str], ...)} out_data = {"field1": "field1 value", "field2": ["field2 value1", "field2 value2"]} ic_obj = ActionNode.create_model_class("prd", out_mapping) role_id = "UTUser2(Architect)" - content = "The user has requested the creation of a command-line interface (CLI) snake game" + content = text_embed_arr[4].get( + "text", "The user has requested the creation of a command-line interface (CLI) snake game" + ) message = Message( content=content, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD ) # WritePRD as test action @@ -67,12 +73,14 @@ def test_actionout_message(): memory_storage.add(message) assert memory_storage.is_initialized is True - sim_conent = "The request is command-line interface (CLI) snake game" + sim_conent = text_embed_arr[5].get("text", "The request is command-line interface (CLI) snake game") sim_message = Message(content=sim_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD) new_messages = memory_storage.search_dissimilar(sim_message) assert len(new_messages) == 0 # similar, return [] - new_conent = "Incorporate basic features of a snake game such as scoring and increasing difficulty" + new_conent = text_embed_arr[6].get( + "text", "Incorporate basic features of a snake game such as scoring and increasing difficulty" + ) new_message = Message(content=new_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD) new_messages = memory_storage.search_dissimilar(new_message) assert new_messages[0].content == message.content diff --git a/tests/metagpt/test_incremental_dev.py b/tests/metagpt/test_incremental_dev.py index c47397dd7..3322df234 100644 --- a/tests/metagpt/test_incremental_dev.py +++ b/tests/metagpt/test_incremental_dev.py @@ -142,6 +142,9 @@ def check_or_create_base_tag(project_path): # Initialize a Git repository subprocess.run(["git", "init"], check=True) + # Check if the .gitignore exists. If it doesn't exist, create .gitignore and add the comment + subprocess.run(f"echo # Ignore these files or directories > {'.gitignore'}", shell=True) + # Check if the 'base' tag exists check_base_tag_cmd = ["git", "show-ref", "--verify", "--quiet", "refs/tags/base"] if subprocess.run(check_base_tag_cmd).returncode == 0: