diff --git a/examples/data/exp_pool/engineer_exps.json b/examples/data/exp_pool/engineer_exps.json new file mode 100644 index 000000000..89c0bc186 --- /dev/null +++ b/examples/data/exp_pool/engineer_exps.json @@ -0,0 +1,16 @@ +[{ + "req": [{ + "role": "user", + "content": "\n# Current Plan\n{'goal': \"Please write a 1048 game using JavaScript and HTML code without using any frameworks, user can play with keyboard. Refer to the system design located at '/tmp/system_design.json' and the project schedule at '/tmp/project_schedule.json' for detailed information.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nBased on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks.\nIf plan is created, you should track the progress and update the plan accordingly, such as Plan.finish_current_task, Plan.append_task, Plan.reset_task, Plan.replace_task, etc.\nWhen presented a current task, tackle the task using the available commands.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to new user requirement.\nNote:\n1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help.\n2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task.\n3. Each time you finish a task, use RoleZero.reply_to_human to report your progress.\n4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.\n5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.\n6. When not specified, you should write files in a folder named \"src\". If you know the project path, then write in a \"src\" folder under the project path.\n7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.\n8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.\n9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.\n10. When provided system design, YOU MUST FOLLOW \"Data structures and interfaces\". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.\n11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.\n12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.\n13. When a system design or project schedule is provided, at the end of the plan, add a CodeRview Task for each file; for example, if there are three files, add three CodeRview Tasks. For each CodeRview Task, just call ReviewAndRewriteCode.run.\n" + }], + "resp": "\nExplanation: To begin the task of creating a 2048 game using JavaScript and HTML, I need to first read the system design and project schedule documents to understand the specific requirements and architecture outlined for the game development.\n\n```json\n[\n {\n \"command_name\": \"Editor.read\",\n \"args\": {\n \"path\": \"/tmp/system_design.json\"\n }\n },\n {\n \"command_name\": \"Editor.read\",\n \"args\": {\n \"path\": \"/tmp/project_schedule.json\"\n }\n }\n]\n```\n" +}, { + "req": [{ + "role": "user", + "content": "Command Editor.read executed: file_path='/tmp/system_design.json' block_content='001|{\"Implementation approach\":\"We will implement the 2048 game using plain JavaScript and HTML, ensuring no frameworks are used. The game logic will handle tile movements, merging, and game state updates. The UI will be simple and clean, with a responsive design to fit different screen sizes. We will use CSS for styling and ensure the game is playable with keyboard arrow keys. The game will display the current score, have a restart button, and show a game over message when no more moves are possible.\",\"File list\":[\"index.html\",\"style.css\",\"script.js\"],\"Data structures and interfaces\":\"\\\\nclassDiagram\\\\n class Game {\\\\n -grid: int[][]\\\\n -score: int\\\\n +init(): void\\\\n +move(direction: str): void\\\\n +merge(direction: str): void\\\\n +isGameOver(): bool\\\\n +restart(): void\\\\n }\\\\n class UI {\\\\n -game: Game\\\\n +init(): void\\\\n +update(): void\\\\n +showGameOver(): void\\\\n +bindEvents(): void\\\\n }\\\\n Game --> UI\\\\n\",\"Program call flow\":\"\\\\nsequenceDiagram\\\\n participant U as UI\\\\n participant G as Game\\\\n U->>G: init()\\\\n G-->>U: return\\\\n U->>U: bindEvents()\\\\n U->>G: move(direction)\\\\n G->>G: merge(direction)\\\\n G->>U: update()\\\\n U->>U: update()\\\\n U->>G: isGameOver()\\\\n G-->>U: return bool\\\\n alt Game Over\\\\n U->>U: showGameOver()\\\\n end\\\\n U->>G: restart()\\\\n G-->>U: return\\\\n\",\"Anything UNCLEAR\":\"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality.\"}\\n'\n\nCommand Editor.read executed: file_path='/tmp/project_schedule.json' block_content='001|{\"Required packages\":[\"No third-party dependencies required\"],\"Required Other language third-party packages\":[\"No third-party dependencies required\"],\"Logic Analysis\":[[\"script.js\",\"Contains Game and UI classes, and their methods: init, move, merge, isGameOver, restart, update, showGameOver, bindEvents\"],[\"index.html\",\"Contains the HTML structure for the game UI\"],[\"style.css\",\"Contains the CSS styles for the game UI\"]],\"Task list\":[\"index.html\",\"style.css\",\"script.js\"],\"Full API spec\":\"\",\"Shared Knowledge\":\"The `script.js` file will contain the core game logic and UI handling. The `index.html` file will provide the structure for the game, and `style.css` will handle the styling.\",\"Anything UNCLEAR\":\"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality.\"}\\n'" + }, { + "role": "user", + "content": "\n# Current Plan\n{'goal': \"Please write a 1048 game using JavaScript and HTML code without using any frameworks, user can play with keyboard. Refer to the system design located at '/tmp/system_design.json' and the project schedule at '/tmp/project_schedule.json' for detailed information.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nBased on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks.\nIf plan is created, you should track the progress and update the plan accordingly, such as Plan.finish_current_task, Plan.append_task, Plan.reset_task, Plan.replace_task, etc.\nWhen presented a current task, tackle the task using the available commands.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to new user requirement.\nNote:\n1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help.\n2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task.\n3. Each time you finish a task, use RoleZero.reply_to_human to report your progress.\n4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.\n5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.\n6. When not specified, you should write files in a folder named \"src\". If you know the project path, then write in a \"src\" folder under the project path.\n7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.\n8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.\n9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.\n10. When provided system design, YOU MUST FOLLOW \"Data structures and interfaces\". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.\n11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.\n12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.\n13. When a system design or project schedule is provided, at the end of the plan, add a CodeRview Task for each file; for example, if there are three files, add three CodeRview Tasks. For each CodeRview Task, just call ReviewAndRewriteCode.run.\n" + }], + "resp": "\nExplanation: Based on the system design and project schedule, I will create a plan to implement the 2048 game. The plan will consist of tasks to create the necessary files (`index.html`, `style.css`, and `script.js`) and review each file to ensure they meet the design specifications.\n\nHere's the plan:\n\n1. **Task 1**: Create `index.html` - This file will contain the HTML structure necessary for the game's UI.\n2. **Task 2**: Create `style.css` - This file will define the CSS styles to make the game visually appealing and responsive.\n3. **Task 3**: Create `script.js` - This file will contain the JavaScript code for the game logic and UI interactions.\n4. **Code Review Tasks**: Review each file to ensure they meet the project requirements and adhere to the system design.\n\nLet's start by appending the first task to the plan.\n\n```json\n[\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"1\",\n \"dependent_task_ids\": [],\n \"instruction\": \"Create the index.html file with the basic HTML structure for the 2048 game.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"2\",\n \"dependent_task_ids\": [\"1\"],\n \"instruction\": \"Create the style.css file with the necessary CSS to style the 2048 game.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"3\",\n \"dependent_task_ids\": [\"1\", \"2\"],\n \"instruction\": \"Create the script.js file containing the JavaScript logic for the 2048 game.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"4\",\n \"dependent_task_ids\": [\"1\"],\n \"instruction\": \"Use ReviewAndRewriteCode to review the code in index.html to ensure it meets the design specifications.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"5\",\n \"dependent_task_ids\": [\"2\"],\n \"instruction\": \"Use ReviewAndRewriteCode to review the code in style.css to ensure it meets the design specifications.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"6\",\n \"dependent_task_ids\": [\"3\"],\n \"instruction\": \"Use ReviewAndRewriteCode to review the code in script.js to ensure it meets the design specifications.\",\n \"assignee\": \"Alex\"\n }\n }\n]\n```\n" +}] \ No newline at end of file diff --git a/examples/data/exp_pool/team_leader_exps.json b/examples/data/exp_pool/team_leader_exps.json new file mode 100644 index 000000000..125f0a48d --- /dev/null +++ b/examples/data/exp_pool/team_leader_exps.json @@ -0,0 +1,22 @@ +[{ + "req": [{ + "role": "user", + "content": "\n# Current Plan\n{'goal': \"from to {''}: Write a 1024 game using JavaScript and HTML code without using any frameworks, user can play with keyboard.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n" + }], + "resp": "\nExplanation: The requirement is about software development. Assign each tasks to a different team member based on their expertise. When publishing message to Product Manager, we copy original user requirement directly to ensure no information loss.\n```json\n[\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"1\",\n \"dependent_task_ids\": [],\n \"instruction\": \"Create a product requirement document (PRD) outlining the features, user interface, and user experience of the CLI python snake game.\",\n \"assignee\": \"Alice\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"2\",\n \"dependent_task_ids\": [\"1\"],\n \"instruction\": \"Design the software architecture for the CLI snake game, including the choice of programming language, libraries, and data flow.\",\n \"assignee\": \"Bob\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"3\",\n \"dependent_task_ids\": [\"2\"],\n \"instruction\": \"Break down the architecture into manageable tasks, identify task dependencies, and prepare a detailed task list for implementation.\",\n \"assignee\": \"Eve\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"4\",\n \"dependent_task_ids\": [\"3\"],\n \"instruction\": \"Implement the core game logic for the CLI snake game, including snake movement, food generation, and score tracking.\",\n \"assignee\": \"Alex\"\n }\n },\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"5\",\n \"dependent_task_ids\": [\"4\"],\n \"instruction\": \"Write comprehensive tests for the game logic and user interface to ensure functionality and reliability.\",\n \"assignee\": \"Edward\"\n }\n },\n {\n \"command_name\": \"TeamLeader.publish_message\",\n \"args\": {\n \"content\": \"Create a cli snake game using Python\",\n \"send_to\": \"Alice\"\n }\n },\n {\n \"command_name\": \"RoleZero.reply_to_human\",\n \"args\": {\n \"content\": \"I have assigned the tasks to the team members. Alice will create the PRD, Bob will design the software architecture, Eve will break down the architecture into tasks, Alex will implement the core game logic, and Edward will write comprehensive tests. The team will work on the project accordingly\"\n }\n },\n {\n \"command_name\": \"end\"\n }\n]\n```\n" +}, { + "req": [{ + "role": "user", + "content": "\n# Current Plan\n{'goal': \"from to {''}: Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n" + }], + "resp": "\nExplanation: DON'T decompose requirement if it is a DATA-RELATED task, assign a single task directly to Data Analyst David. He will manage the decomposition and implementation.\n```json\n[\n {\n \"command_name\": \"Plan.append_task\",\n \"args\": {\n \"task_id\": \"1\",\n \"dependent_task_ids\": [],\n \"instruction\": \"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\",\n \"assignee\": \"David\"\n }\n },\n {\n \"command_name\": \"TeamLeader.publish_message\",\n \"args\": {\n \"content\": \"Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\",\n \"send_to\": \"David\"\n }\n },\n {\n \"command_name\": \"RoleZero.reply_to_human\",\n \"args\": {\n \"content\": \"I have assigned the task to David. He will break down the task further by himself and starts solving it.\"\n }\n },\n {\n \"command_name\": \"end\"\n }\n]\n```\n" +}, { + "req": "Conversation History:\n[\n ...,\n {'role': 'assistant', 'content': 'from Alice(Product Manager) to {'Bob'}: {'docs': {'20240424153821.json': {'root_path': 'docs/prd', 'filename': '20240424153821.json', 'content': '{\"Language\":\"en_us\",\"Programming Language\":\"Python\",\"Original Requirements\":\"create a cli snake game\",\"Project Name\":\"snake_game\",\"Product Goals\":[\"Develop an intuitive and addictive snake game\",...], ...}}}}},\n]\n", + "resp": "\nExplanation: You received a message from Alice, the Product Manager, that she has completed the PRD, use Plan.finish_current_task to mark her task as finished and moves the plan to the next task. Based on plan status, next task is for Bob (Architect), publish a message asking him to start. The message content should contain important path info.\n```json\n[\n {\n \"command_name\": \"Plan.finish_current_task\",\n \"args\": {}\n },\n {\n \"command_name\": \"TeamLeader.publish_message\",\n \"args\": {\n \"content\": \"Please design the software architecture for the snake game based on the PRD created by Alice. The PRD is at 'docs/prd/20240424153821.json'. Include the choice of programming language, libraries, and data flow, etc.\",\n \"send_to\": \"Bob\"\n }\n },\n {\n \"command_name\": \"RoleZero.reply_to_human\",\n \"args\": {\n \"content\": \"Alice has completed the PRD. I have marked her task as finished and sent the PRD to Bob. Bob will work on the software architecture.\"\n }\n },\n {\n \"command_name\": \"end\"\n }\n]\n```\n" +}, { + "req": [{ + "role": "user", + "content": "\n# Current Plan\n{'goal': \"from to {''}: how does the project go?\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n" + }], + "resp": "\nExplanation: The user is asking for a general update on the project status. Give a straight answer about the current task the team is working on and provide a summary of the completed tasks.\n```json\n[\n {\n \"command_name\": \"RoleZero.reply_to_human\",\n \"args\": {\n \"content\": \"The team is currently working on ... We have completed ...\"\n }\n },\n {\n \"command_name\": \"end\"\n }\n]\n```\n" +}] \ No newline at end of file diff --git a/examples/exp_pool/README.md b/examples/exp_pool/README.md new file mode 100644 index 000000000..d405bfa22 --- /dev/null +++ b/examples/exp_pool/README.md @@ -0,0 +1,19 @@ +# Experience Pool + +## Prerequisites +- Ensure the RAG module is installed: https://docs.deepwisdom.ai/main/en/guide/in_depth_guides/rag_module.html +- Set both `enable_read` and `enable_write` to `true` in the `exp_pool` section of `config2.yaml` + +## Example Files + +### 1. decorator.py +Showcases the implementation of the `@exp_cache` decorator. + +### 2. init_exp_pool.py +Demonstrates the process of initializing the experience pool. + +### 3. manager.py +Illustrates CRUD (Create, Read, Update, Delete) operations for managing experiences in the pool. + +### 4. scorer.py +Outlines methods for evaluating and scoring experiences within the pool. diff --git a/examples/exp_pool/decorator.py b/examples/exp_pool/decorator.py index 00726a0a8..d25949e8d 100644 --- a/examples/exp_pool/decorator.py +++ b/examples/exp_pool/decorator.py @@ -1,4 +1,6 @@ -"""Decorator example of experience pool.""" +""" +This script demonstrates how to automatically store experiences using @exp_cache and query the stored experiences. +""" import asyncio import uuid @@ -16,7 +18,7 @@ async def main(): req = "Water" resp = await produce(req=req) - logger.info(f"The resp of `produce{req}` is: {resp}") + logger.info(f"The response of `produce({req})` is: {resp}") exps = await exp_manager.query_exps(req) logger.info(f"Find experiences: {exps}") diff --git a/examples/exp_pool/init_exp_pool.py b/examples/exp_pool/init_exp_pool.py new file mode 100644 index 000000000..14c415be7 --- /dev/null +++ b/examples/exp_pool/init_exp_pool.py @@ -0,0 +1,94 @@ +"""Init experience pool. + +Put some useful experiences into the experience pool. +""" + +import asyncio +import json +from pathlib import Path + +from metagpt.const import EXAMPLE_DATA_PATH +from metagpt.exp_pool import exp_manager +from metagpt.exp_pool.schema import EntryType, Experience, Metric, Score +from metagpt.logs import logger +from metagpt.utils.common import aread + + +async def load_file(filepath) -> list[dict]: + """Asynchronously loads and parses a JSON file. + + Args: + filepath: Path to the JSON file. + + Returns: + A list of dictionaries parsed from the JSON file. + """ + + return json.loads(await aread(filepath)) + + +async def add_exp(req: str, resp: str, tag: str, metric: Metric = None): + """Adds a new experience to the experience pool. + + Args: + req: The request string. + resp: The response string. + tag: A tag for categorizing the experience. + metric: Optional metric for the experience. Defaults to a score of 10. + + """ + + exp = Experience( + req=req, + resp=resp, + entry_type=EntryType.MANUAL, + tag=tag, + metric=metric or Metric(score=Score(val=10, reason="Manual")), + ) + + exp_manager.config.exp_pool.enable_write = True + exp_manager.create_exp(exp) + logger.info(f"New experience created for the request `{req[:10]}`.") + + +async def add_exps(exps: list, tag: str): + """Adds multiple experiences to the experience pool. + + Args: + exps: A list of experience dictionaries. + tag: A tag for categorizing the experiences. + + """ + + tasks = [add_exp(req=json.dumps(exp["req"]), resp=exp["resp"], tag=tag) for exp in exps] + await asyncio.gather(*tasks) + + +async def add_exps_from_file(tag: str, filepath: Path): + """Loads experiences from a file and adds them to the experience pool. + + Args: + tag: A tag for categorizing the experiences. + filepath: Path to the file containing experiences. + + """ + + exps = await load_file(filepath) + await add_exps(exps, tag) + + +def query_exps_count(): + """Queries and logs the total count of experiences in the pool.""" + + count = exp_manager.get_exps_count() + logger.info(f"Experiences Count: {count}") + + +async def main(): + await add_exps_from_file("TeamLeader.llm_cached_aask", EXAMPLE_DATA_PATH / "exp_pool/team_leader_exps.json"), + await add_exps_from_file("Engineer2.llm_cached_aask", EXAMPLE_DATA_PATH / "exp_pool/engineer_exps.json") + query_exps_count() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/exp_pool/init_exp_pool/engineer_exps.py b/examples/exp_pool/init_exp_pool/engineer_exps.py deleted file mode 100644 index 022a0c829..000000000 --- a/examples/exp_pool/init_exp_pool/engineer_exps.py +++ /dev/null @@ -1,114 +0,0 @@ -import json - -OBJ1 = [ - { - "role": "user", - "content": "\n# Current Plan\n{'goal': \"Please write a 1048 game using JavaScript and HTML code without using any frameworks, user can play with keyboard. Refer to the system design located at '/tmp/system_design.json' and the project schedule at '/tmp/project_schedule.json' for detailed information.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nBased on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks.\nIf plan is created, you should track the progress and update the plan accordingly, such as Plan.finish_current_task, Plan.append_task, Plan.reset_task, Plan.replace_task, etc.\nWhen presented a current task, tackle the task using the available commands.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to new user requirement.\nNote:\n1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help.\n2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task.\n3. Each time you finish a task, use RoleZero.reply_to_human to report your progress.\n4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.\n5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.\n6. When not specified, you should write files in a folder named \"src\". If you know the project path, then write in a \"src\" folder under the project path.\n7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.\n8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.\n9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.\n10. When provided system design, YOU MUST FOLLOW \"Data structures and interfaces\". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.\n11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.\n12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.\n13. When a system design or project schedule is provided, at the end of the plan, add a CodeRview Task for each file; for example, if there are three files, add three CodeRview Tasks. For each CodeRview Task, just call ReviewAndRewriteCode.run.\n", - } -] -OBJ2 = [ - { - "role": "user", - "content": 'Command Editor.read executed: file_path=\'/tmp/system_design.json\' block_content=\'001|{"Implementation approach":"We will implement the 2048 game using plain JavaScript and HTML, ensuring no frameworks are used. The game logic will handle tile movements, merging, and game state updates. The UI will be simple and clean, with a responsive design to fit different screen sizes. We will use CSS for styling and ensure the game is playable with keyboard arrow keys. The game will display the current score, have a restart button, and show a game over message when no more moves are possible.","File list":["index.html","style.css","script.js"],"Data structures and interfaces":"\\\\nclassDiagram\\\\n class Game {\\\\n -grid: int[][]\\\\n -score: int\\\\n +init(): void\\\\n +move(direction: str): void\\\\n +merge(direction: str): void\\\\n +isGameOver(): bool\\\\n +restart(): void\\\\n }\\\\n class UI {\\\\n -game: Game\\\\n +init(): void\\\\n +update(): void\\\\n +showGameOver(): void\\\\n +bindEvents(): void\\\\n }\\\\n Game --> UI\\\\n","Program call flow":"\\\\nsequenceDiagram\\\\n participant U as UI\\\\n participant G as Game\\\\n U->>G: init()\\\\n G-->>U: return\\\\n U->>U: bindEvents()\\\\n U->>G: move(direction)\\\\n G->>G: merge(direction)\\\\n G->>U: update()\\\\n U->>U: update()\\\\n U->>G: isGameOver()\\\\n G-->>U: return bool\\\\n alt Game Over\\\\n U->>U: showGameOver()\\\\n end\\\\n U->>G: restart()\\\\n G-->>U: return\\\\n","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}\\n\'\n\nCommand Editor.read executed: file_path=\'/tmp/project_schedule.json\' block_content=\'001|{"Required packages":["No third-party dependencies required"],"Required Other language third-party packages":["No third-party dependencies required"],"Logic Analysis":[["script.js","Contains Game and UI classes, and their methods: init, move, merge, isGameOver, restart, update, showGameOver, bindEvents"],["index.html","Contains the HTML structure for the game UI"],["style.css","Contains the CSS styles for the game UI"]],"Task list":["index.html","style.css","script.js"],"Full API spec":"","Shared Knowledge":"The `script.js` file will contain the core game logic and UI handling. The `index.html` file will provide the structure for the game, and `style.css` will handle the styling.","Anything UNCLEAR":"Clarify if there are any specific design preferences or additional features required beyond the basic 2048 game functionality."}\\n\'', - }, - { - "role": "user", - "content": "\n# Current Plan\n{'goal': \"Please write a 1048 game using JavaScript and HTML code without using any frameworks, user can play with keyboard. Refer to the system design located at '/tmp/system_design.json' and the project schedule at '/tmp/project_schedule.json' for detailed information.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nBased on the context, write a plan or modify an existing plan to achieve the goal. A plan consists of one to 3 tasks.\nIf plan is created, you should track the progress and update the plan accordingly, such as Plan.finish_current_task, Plan.append_task, Plan.reset_task, Plan.replace_task, etc.\nWhen presented a current task, tackle the task using the available commands.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to new user requirement.\nNote:\n1. If you keeping encountering errors, unexpected situation, or you are not sure of proceeding, use RoleZero.ask_human to ask for help.\n2. Carefully review your progress at the current task, if your actions so far has not fulfilled the task instruction, you should continue with current task. Otherwise, finish current task.\n3. Each time you finish a task, use RoleZero.reply_to_human to report your progress.\n4. Each time you write a code in your response, write with the Editor directly without preparing a repetitive code block beforehand.\n5. Take on ONE task and write ONE code file in each response. DON'T attempt all tasks in one response.\n6. When not specified, you should write files in a folder named \"src\". If you know the project path, then write in a \"src\" folder under the project path.\n7. When provided system design or project schedule, you MUST read them first before making a plan, then adhere to them in your implementation, especially in the programming language, package, or framework. You MUST implement all code files prescribed in the system design or project schedule. You can create a plan first with each task corresponding to implementing one code file.\n8. Write at most one file per task, do your best to implement THE ONLY ONE FILE. CAREFULLY CHECK THAT YOU DONT MISS ANY NECESSARY CLASS/FUNCTION IN THIS FILE.\n9. COMPLETE CODE: Your code will be part of the entire project, so please implement complete, reliable, reusable code snippets.\n10. When provided system design, YOU MUST FOLLOW \"Data structures and interfaces\". DONT CHANGE ANY DESIGN. Do not use public member functions that do not exist in your design.\n11. Write out EVERY CODE DETAIL, DON'T LEAVE TODO.\n12. To modify code in a file, read the entire file, make changes, and update the file with the complete code, ensuring that no line numbers are included in the final write.\n13. When a system design or project schedule is provided, at the end of the plan, add a CodeRview Task for each file; for example, if there are three files, add three CodeRview Tasks. For each CodeRview Task, just call ReviewAndRewriteCode.run.\n", - }, -] -REQ1 = json.dumps(OBJ1) -REQ2 = json.dumps(OBJ2) - -RESP1 = """ -Explanation: To begin the task of creating a 2048 game using JavaScript and HTML, I need to first read the system design and project schedule documents to understand the specific requirements and architecture outlined for the game development. - -```json -[ - { - "command_name": "Editor.read", - "args": { - "path": "/tmp/system_design.json" - } - }, - { - "command_name": "Editor.read", - "args": { - "path": "/tmp/project_schedule.json" - } - } -] -``` -""" -RESP2 = """ -Explanation: Based on the system design and project schedule, I will create a plan to implement the 2048 game. The plan will consist of tasks to create the necessary files (`index.html`, `style.css`, and `script.js`) and review each file to ensure they meet the design specifications. - -Here's the plan: - -1. **Task 1**: Create `index.html` - This file will contain the HTML structure necessary for the game's UI. -2. **Task 2**: Create `style.css` - This file will define the CSS styles to make the game visually appealing and responsive. -3. **Task 3**: Create `script.js` - This file will contain the JavaScript code for the game logic and UI interactions. -4. **Code Review Tasks**: Review each file to ensure they meet the project requirements and adhere to the system design. - -Let's start by appending the first task to the plan. - -```json -[ - { - "command_name": "Plan.append_task", - "args": { - "task_id": "1", - "dependent_task_ids": [], - "instruction": "Create the index.html file with the basic HTML structure for the 2048 game.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "2", - "dependent_task_ids": ["1"], - "instruction": "Create the style.css file with the necessary CSS to style the 2048 game.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "3", - "dependent_task_ids": ["1", "2"], - "instruction": "Create the script.js file containing the JavaScript logic for the 2048 game.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "4", - "dependent_task_ids": ["1"], - "instruction": "Use ReviewAndRewriteCode to review the code in index.html to ensure it meets the design specifications.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "5", - "dependent_task_ids": ["2"], - "instruction": "Use ReviewAndRewriteCode to review the code in style.css to ensure it meets the design specifications.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "6", - "dependent_task_ids": ["3"], - "instruction": "Use ReviewAndRewriteCode to review the code in script.js to ensure it meets the design specifications.", - "assignee": "Alex" - } - } -] -``` -""" - -ENGINEER_EXPERIENCES = ((REQ1, RESP1), (REQ2, RESP2)) diff --git a/examples/exp_pool/init_exp_pool/main.py b/examples/exp_pool/init_exp_pool/main.py deleted file mode 100644 index 849834513..000000000 --- a/examples/exp_pool/init_exp_pool/main.py +++ /dev/null @@ -1,55 +0,0 @@ -"""Init experience pool. - -Put some useful experiences into the experience pool. -""" - -import asyncio - -from examples.exp_pool.init_exp_pool.engineer_exps import ENGINEER_EXPERIENCES -from examples.exp_pool.init_exp_pool.team_leader_exps import TEAM_LEADER_EXPERIENCES -from metagpt.exp_pool import exp_manager -from metagpt.exp_pool.schema import EntryType, Experience, Metric, Score -from metagpt.logs import logger - - -async def add_exp(req: str, resp: str, tag: str, metric: Metric = None): - exp = Experience( - req=req, - resp=resp, - entry_type=EntryType.MANUAL, - tag=tag, - metric=metric or Metric(score=Score(val=10, reason="Manual")), - ) - - exp_manager.config.exp_pool.enable_write = True - exp_manager.create_exp(exp) - logger.info(f"New experience created for the request `{req[:10]}`.") - - -async def add_teamleader_exps(): - tag = "TeamLeader.llm_cached_aask" - - for req, resp in TEAM_LEADER_EXPERIENCES: - await add_exp(req=req, resp=resp, tag=tag) - - -async def add_engineer_exps(): - tag = "Engineer2.llm_cached_aask" - - for req, resp in ENGINEER_EXPERIENCES: - await add_exp(req=req, resp=resp, tag=tag) - - -def query_exps_count(): - count = exp_manager.get_exps_count() - logger.info(f"Experiences Count: {count}") - - -async def main(): - await add_teamleader_exps() - await add_engineer_exps() - query_exps_count() - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/exp_pool/init_exp_pool/team_leader_exps.py b/examples/exp_pool/init_exp_pool/team_leader_exps.py deleted file mode 100644 index 347faac45..000000000 --- a/examples/exp_pool/init_exp_pool/team_leader_exps.py +++ /dev/null @@ -1,181 +0,0 @@ -import json - -OBJ1 = [ - { - "role": "user", - "content": "\n# Current Plan\n{'goal': \"from to {''}: Write a 1024 game using JavaScript and HTML code without using any frameworks, user can play with keyboard.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n", - } -] -OBJ2 = [ - { - "role": "user", - "content": "\n# Current Plan\n{'goal': \"from to {''}: Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n", - } -] -OBJ4 = [ - { - "role": "user", - "content": "\n# Current Plan\n{'goal': \"from to {''}: how does the project go?\", 'tasks': []}\n\n# Current Task\n\n\n# Instruction\nYou are a team leader, and you are responsible for drafting tasks and routing tasks to your team members.\nYour team member:\nTim: Team Leader, \nAlice: Product Manager, efficiently create a successful product that meets market demands and user expectations\nBob: Architect, design a concise, usable, complete software system\nEve: Project Manager, break down tasks according to PRD/technical design, generate a task list, and analyze task dependencies to start with the prerequisite modules\nAlex: Engineer, Take on game, app, and web development\nDavid: DataAnalyst, Take on any data-related tasks, such as data analysis, machine learning, deep learning, web browsing, web scraping, web searching, web deployment, terminal operation, git and github operation, etc.\n\nYou should NOT assign consecutive tasks to the same team member, instead, assign an aggregated task (or the complete requirement) and let the team member to decompose it.\nWhen creating a new plan involving multiple members, create all tasks at once.\nIf plan is created, you should track the progress based on team member feedback message, and update plan accordingly, such as Plan.finish_current_task, Plan.reset_task, Plan.replace_task, etc.\nYou should use TeamLeader.publish_team_message to team members, asking them to start their task. DONT omit any necessary info such as path, link, environment, programming language, framework, requirement, constraint from original content to team members because you are their sole info source.\nPay close attention to new user message, review the conversation history, use RoleZero.reply_to_human to respond to the user directly, DON'T ask your team members.\n\nNote:\n1. If the requirement is a pure DATA-RELATED requirement, such as bug fixes, issue reporting, environment setup, terminal operations, pip install, web browsing, web scraping, web searching, web imitation, data science, data analysis, machine learning, deep learning, text-to-image etc. DON'T decompose it, assign a single task with the original user requirement as instruction directly to Data Analyst.\n2. If the requirement is developing a software, game, app, or website, excluding the above data-related tasks, you should decompose the requirement into multiple tasks and assign them to different team members based on their expertise, usually the sequence of Product Manager -> Architect -> Project Manager -> Engineer -> (optional: QaEngine if present) -> (optional: DataAnalyst if user requests deployment), each assigned ONE task. When publishing message to Product Manager, you should directly copy the full original user requirement.\n3. If the requirement contains both DATA-RELATED part mentioned in 1 and software development part mentioned in 2, you should decompose the software development part and assign them to different team members based on their expertise, and assign the DATA-RELATED part to Data Analyst David directly.\n4. If the requirement is a common-sense, logical, or math problem, you should respond directly without assigning any task to team members.\n5. If you think the requirement is not clear or ambiguous, you should ask the user for clarification immediately. Assign tasks only after all info is clear.\n6. It is helpful for Engineer to have both the system design and the project schedule for writing the code, so include paths of both files (if available) and remind Engineer to definitely read them when publishing message to Engineer.\n7. If the requirement is writing a TRD and software framework, you should assign it to Architect. When publishing message to Architect, you should directly copy the full original user requirement.\n", - } -] - -REQ1 = json.dumps(OBJ1) -REQ2 = json.dumps(OBJ2) -REQ3 = """Conversation History: -[ - ..., - {'role': 'assistant', 'content': 'from Alice(Product Manager) to {'Bob'}: {'docs': {'20240424153821.json': {'root_path': 'docs/prd', 'filename': '20240424153821.json', 'content': '{"Language":"en_us","Programming Language":"Python","Original Requirements":"create a cli snake game","Project Name":"snake_game","Product Goals":["Develop an intuitive and addictive snake game",...], ...}}}}}, -] -""" -REQ4 = json.dumps(OBJ4) - -RESP1 = """ -Explanation: The requirement is about software development. Assign each tasks to a different team member based on their expertise. When publishing message to Product Manager, we copy original user requirement directly to ensure no information loss. -```json -[ - { - "command_name": "Plan.append_task", - "args": { - "task_id": "1", - "dependent_task_ids": [], - "instruction": "Create a product requirement document (PRD) outlining the features, user interface, and user experience of the CLI python snake game.", - "assignee": "Alice" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "2", - "dependent_task_ids": ["1"], - "instruction": "Design the software architecture for the CLI snake game, including the choice of programming language, libraries, and data flow.", - "assignee": "Bob" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "3", - "dependent_task_ids": ["2"], - "instruction": "Break down the architecture into manageable tasks, identify task dependencies, and prepare a detailed task list for implementation.", - "assignee": "Eve" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "4", - "dependent_task_ids": ["3"], - "instruction": "Implement the core game logic for the CLI snake game, including snake movement, food generation, and score tracking.", - "assignee": "Alex" - } - }, - { - "command_name": "Plan.append_task", - "args": { - "task_id": "5", - "dependent_task_ids": ["4"], - "instruction": "Write comprehensive tests for the game logic and user interface to ensure functionality and reliability.", - "assignee": "Edward" - } - }, - { - "command_name": "TeamLeader.publish_message", - "args": { - "content": "Create a cli snake game using Python", - "send_to": "Alice" - } - }, - { - "command_name": "RoleZero.reply_to_human", - "args": { - "content": "I have assigned the tasks to the team members. Alice will create the PRD, Bob will design the software architecture, Eve will break down the architecture into tasks, Alex will implement the core game logic, and Edward will write comprehensive tests. The team will work on the project accordingly" - } - }, - { - "command_name": "end" - } -] -``` -""" -RESP2 = """ -Explanation: DON'T decompose requirement if it is a DATA-RELATED task, assign a single task directly to Data Analyst David. He will manage the decomposition and implementation. -```json -[ - { - "command_name": "Plan.append_task", - "args": { - "task_id": "1", - "dependent_task_ids": [], - "instruction": "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.", - "assignee": "David" - } - }, - { - "command_name": "TeamLeader.publish_message", - "args": { - "content": "Run data analysis on sklearn Wine recognition dataset, include a plot, and train a model to predict wine class (20% as validation), and show validation accuracy.", - "send_to": "David" - } - }, - { - "command_name": "RoleZero.reply_to_human", - "args": { - "content": "I have assigned the task to David. He will break down the task further by himself and starts solving it." - } - }, - { - "command_name": "end" - } -] -``` -""" -RESP3 = """ -Explanation: You received a message from Alice, the Product Manager, that she has completed the PRD, use Plan.finish_current_task to mark her task as finished and moves the plan to the next task. Based on plan status, next task is for Bob (Architect), publish a message asking him to start. The message content should contain important path info. -```json -[ - { - "command_name": "Plan.finish_current_task", - "args": {} - }, - { - "command_name": "TeamLeader.publish_message", - "args": { - "content": "Please design the software architecture for the snake game based on the PRD created by Alice. The PRD is at 'docs/prd/20240424153821.json'. Include the choice of programming language, libraries, and data flow, etc.", - "send_to": "Bob" - } - }, - { - "command_name": "RoleZero.reply_to_human", - "args": { - "content": "Alice has completed the PRD. I have marked her task as finished and sent the PRD to Bob. Bob will work on the software architecture." - } - }, - { - "command_name": "end" - } -] -``` -""" -RESP4 = """ -Explanation: The user is asking for a general update on the project status. Give a straight answer about the current task the team is working on and provide a summary of the completed tasks. -```json -[ - { - "command_name": "RoleZero.reply_to_human", - "args": { - "content": "The team is currently working on ... We have completed ..." - } - }, - { - "command_name": "end" - } -] -``` -""" - -TEAM_LEADER_EXPERIENCES = ( - (REQ1, RESP1), - (REQ2, RESP2), - (REQ3, RESP3), - (REQ4, RESP4), -) diff --git a/examples/exp_pool/manager.py b/examples/exp_pool/manager.py index 3216e78b8..ae998214a 100644 --- a/examples/exp_pool/manager.py +++ b/examples/exp_pool/manager.py @@ -1,4 +1,8 @@ -"""Simple example of experience pool.""" +""" +Demonstrate the creation and querying of experiences. + +This script creates a new experience, logs its creation, and then queries for experiences matching the same request. +""" import asyncio @@ -8,12 +12,16 @@ from metagpt.logs import logger async def main(): - req = "Simple task." - exp = Experience(req=req, resp="echo", entry_type=EntryType.MANUAL) + # Define the simple request and response + req = "Simple req" + resp = "Simple resp" + # Add the new experience + exp = Experience(req=req, resp=resp, entry_type=EntryType.MANUAL) exp_manager.create_exp(exp) logger.info(f"New experience created for the request `{req}`.") + # Query for experiences matching the request exps = await exp_manager.query_exps(req) logger.info(f"Got experiences: {exps}") diff --git a/examples/exp_pool/scorer.py b/examples/exp_pool/scorer.py index c412feaf3..aafcee63f 100644 --- a/examples/exp_pool/scorer.py +++ b/examples/exp_pool/scorer.py @@ -2,13 +2,16 @@ import asyncio from metagpt.exp_pool.scorers import SimpleScorer +# Request to implement quicksort in Python REQ = "Write a program to implement quicksort in python." +# First response: Quicksort implementation without base case RESP1 = """ def quicksort(arr): return quicksort([x for x in arr[1:] if x <= arr[0]]) + [arr[0]] + quicksort([x for x in arr[1:] if x > arr[0]]) """ +# Second response: Quicksort implementation with base case RESP2 = """ def quicksort(arr): if len(arr) <= 1: @@ -18,6 +21,15 @@ def quicksort(arr): async def simple(): + """Evaluates two quicksort implementations using SimpleScorer. + + Example: + { + "val": 3, + "reason": "The response attempts to implement quicksort but contains a critical flaw: it lacks a base case to terminate the recursion, which will lead to a maximum recursion depth exceeded error for non-empty lists. Additionally, the function does not handle empty lists properly. A correct implementation should include a base case to handle lists of length 0 or 1." + } + """ + scorer = SimpleScorer() await scorer.evaluate(req=REQ, resp=RESP1) diff --git a/tests/metagpt/exp_pool/test_manager.py b/tests/metagpt/exp_pool/test_manager.py index 2f712ad44..3fb2aaa6d 100644 --- a/tests/metagpt/exp_pool/test_manager.py +++ b/tests/metagpt/exp_pool/test_manager.py @@ -3,9 +3,8 @@ import pytest from metagpt.config2 import Config from metagpt.configs.exp_pool_config import ExperiencePoolConfig from metagpt.configs.llm_config import LLMConfig -from metagpt.exp_pool.manager import ExperienceManager -from metagpt.exp_pool.schema import Experience -from metagpt.rag.engines import SimpleEngine +from metagpt.exp_pool.manager import Experience, ExperienceManager +from metagpt.exp_pool.schema import QueryType class TestExperienceManager: @@ -15,50 +14,65 @@ class TestExperienceManager: @pytest.fixture def mock_storage(self, mocker): - engine = mocker.MagicMock(spec=SimpleEngine) + engine = mocker.MagicMock() engine.add_objs = mocker.MagicMock() engine.aretrieve = mocker.AsyncMock(return_value=[]) engine._retriever = mocker.MagicMock() engine._retriever._vector_store = mocker.MagicMock() - engine._retriever._vector_store._get = mocker.MagicMock(return_value=mocker.MagicMock(ids=[])) + engine._retriever._vector_store._collection = mocker.MagicMock() + engine._retriever._vector_store._collection.count = mocker.MagicMock(return_value=10) return engine @pytest.fixture - def mock_experience_manager(self, mock_config, mock_storage): - return ExperienceManager(config=mock_config, storage=mock_storage) + def exp_manager(self, mock_config, mock_storage): + manager = ExperienceManager(config=mock_config) + manager._storage = mock_storage + return manager - @pytest.fixture - def mock_experience(self): - return Experience(req="req", resp="resp") - - def test_initialize_storage(self, mock_experience_manager, mock_storage): - assert mock_experience_manager.storage is mock_storage - - def test_create_exp(self, mock_experience_manager, mock_experience): - mock_experience_manager.create_exp(mock_experience) - mock_experience_manager.storage.add_objs.assert_called_with([mock_experience]) - - def test_create_exp_write_disabled(self, mock_experience_manager, mock_experience, mock_config): - mock_config.exp_pool.enable_write = False - mock_experience_manager.create_exp(mock_experience) - mock_experience_manager.storage.add_objs.assert_not_called() + def test_vector_store_property(self, exp_manager): + assert exp_manager.vector_store == exp_manager.storage._retriever._vector_store @pytest.mark.asyncio - async def test_query_exps(self, mock_experience_manager, mocker): - req = "req" - resp = "resp" - tag = "test" - experiences = [Experience(req=req, resp=resp, tag="test"), Experience(req=req, resp=resp, tag="other")] - mock_experience_manager.storage.aretrieve.return_value = [ - mocker.MagicMock(metadata={"obj": exp}) for exp in experiences - ] + async def test_query_exps_with_exact_match(self, exp_manager, mocker): + req = "exact query" + exp1 = Experience(req=req, resp="response1") + exp2 = Experience(req="different query", resp="response2") - result = await mock_experience_manager.query_exps(req, tag) + mock_node1 = mocker.MagicMock(metadata={"obj": exp1}) + mock_node2 = mocker.MagicMock(metadata={"obj": exp2}) + + exp_manager.storage.aretrieve.return_value = [mock_node1, mock_node2] + + result = await exp_manager.query_exps(req, query_type=QueryType.EXACT) assert len(result) == 1 - assert result[0].tag == "test" + assert result[0].req == req @pytest.mark.asyncio - async def test_query_exps_no_read_permission(self, mock_experience_manager, mock_config): + async def test_query_exps_with_tag_filter(self, exp_manager, mocker): + tag = "test_tag" + exp1 = Experience(req="query1", resp="response1", tag=tag) + exp2 = Experience(req="query2", resp="response2", tag="other_tag") + + mock_node1 = mocker.MagicMock(metadata={"obj": exp1}) + mock_node2 = mocker.MagicMock(metadata={"obj": exp2}) + + exp_manager.storage.aretrieve.return_value = [mock_node1, mock_node2] + + result = await exp_manager.query_exps("query", tag=tag) + assert len(result) == 1 + assert result[0].tag == tag + + def test_get_exps_count(self, exp_manager): + assert exp_manager.get_exps_count() == 10 + + def test_create_exp_write_disabled(self, exp_manager, mock_config): + mock_config.exp_pool.enable_write = False + exp = Experience(req="test", resp="response") + exp_manager.create_exp(exp) + exp_manager.storage.add_objs.assert_not_called() + + @pytest.mark.asyncio + async def test_query_exps_read_disabled(self, exp_manager, mock_config): mock_config.exp_pool.enable_read = False - result = await mock_experience_manager.query_exps("query") + result = await exp_manager.query_exps("query") assert result == [] diff --git a/tests/metagpt/exp_pool/test_serializers/test_role_zero.py b/tests/metagpt/exp_pool/test_serializers/test_role_zero.py index d4525d535..a168650fc 100644 --- a/tests/metagpt/exp_pool/test_serializers/test_role_zero.py +++ b/tests/metagpt/exp_pool/test_serializers/test_role_zero.py @@ -22,7 +22,7 @@ class TestRoleZeroSerializer: return [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}] def test_serialize_req_empty_input(self, serializer: RoleZeroSerializer): - assert serializer.serialize_req([]) == "" + assert serializer.serialize_req(req=[]) == "" def test_serialize_req_with_content(self, serializer: RoleZeroSerializer, last_item: dict): req = [ @@ -33,7 +33,7 @@ class TestRoleZeroSerializer: expected_output = json.dumps( [{"role": "user", "content": "Command Editor.read executed: file_path=test.py"}, last_item] ) - assert serializer.serialize_req(req) == expected_output + assert serializer.serialize_req(req=req) == expected_output def test_filter_req(self, serializer: RoleZeroSerializer): req = [ diff --git a/tests/metagpt/exp_pool/test_serializers/test_simple.py b/tests/metagpt/exp_pool/test_serializers/test_simple.py index 05ef1ca11..2a6bf96e3 100644 --- a/tests/metagpt/exp_pool/test_serializers/test_simple.py +++ b/tests/metagpt/exp_pool/test_serializers/test_simple.py @@ -8,28 +8,28 @@ class TestSimpleSerializer: def serializer(self): return SimpleSerializer() - def test_serialize_req(self, serializer): + def test_serialize_req(self, serializer: SimpleSerializer): # Test with different types of input - assert serializer.serialize_req(123) == "123" - assert serializer.serialize_req("test") == "test" - assert serializer.serialize_req([1, 2, 3]) == "[1, 2, 3]" - assert serializer.serialize_req({"a": 1}) == "{'a': 1}" + assert serializer.serialize_req(req=123) == "123" + assert serializer.serialize_req(req="test") == "test" + assert serializer.serialize_req(req=[1, 2, 3]) == "[1, 2, 3]" + assert serializer.serialize_req(req={"a": 1}) == "{'a': 1}" - def test_serialize_resp(self, serializer): + def test_serialize_resp(self, serializer: SimpleSerializer): # Test with different types of input assert serializer.serialize_resp(456) == "456" assert serializer.serialize_resp("response") == "response" assert serializer.serialize_resp([4, 5, 6]) == "[4, 5, 6]" assert serializer.serialize_resp({"b": 2}) == "{'b': 2}" - def test_deserialize_resp(self, serializer): + def test_deserialize_resp(self, serializer: SimpleSerializer): # Test with different types of input assert serializer.deserialize_resp("789") == "789" assert serializer.deserialize_resp("test_response") == "test_response" assert serializer.deserialize_resp("[7, 8, 9]") == "[7, 8, 9]" assert serializer.deserialize_resp("{'c': 3}") == "{'c': 3}" - def test_roundtrip(self, serializer): + def test_roundtrip(self, serializer: SimpleSerializer): # Test serialization and deserialization roundtrip original = "test_roundtrip" serialized = serializer.serialize_resp(original) @@ -37,8 +37,8 @@ class TestSimpleSerializer: assert deserialized == original @pytest.mark.parametrize("input_value", [123, "test", [1, 2, 3], {"a": 1}, None]) - def test_serialize_req_types(self, serializer, input_value): + def test_serialize_req_types(self, serializer: SimpleSerializer, input_value): # Test serialize_req with various input types - result = serializer.serialize_req(input_value) + result = serializer.serialize_req(req=input_value) assert isinstance(result, str) assert result == str(input_value)