mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-15 11:02:36 +02:00
Merge remote-tracking branch 'origin/dev_tool_selection' into dev_tool_selection
This commit is contained in:
commit
824e285cc7
8 changed files with 139 additions and 52 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -148,6 +148,9 @@ allure-results
|
|||
.DS_Store
|
||||
.vscode
|
||||
|
||||
# Config
|
||||
config/config.yaml
|
||||
|
||||
log.txt
|
||||
docs/scripts/set_env.sh
|
||||
key.yaml
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
## The official OPENAI_API_BASE is https://api.openai.com/v1
|
||||
## If the official OPENAI_API_BASE is not available, we recommend using the [openai-forward](https://github.com/beidongjiedeguang/openai-forward).
|
||||
## Or, you can configure OPENAI_PROXY to access official OPENAI_API_BASE.
|
||||
OPENAI_API_BASE: "https://api.openai.com/v1"
|
||||
#OPENAI_API_BASE: "https://api.openai.com/v1"
|
||||
#OPENAI_PROXY: "http://127.0.0.1:8118"
|
||||
#OPENAI_API_KEY: "YOUR_API_KEY" # set the value to sk-xxx if you host the openai interface for open llm model
|
||||
OPENAI_API_MODEL: "gpt-4"
|
||||
|
|
@ -24,12 +24,13 @@ RPM: 10
|
|||
|
||||
#### if AZURE, check https://github.com/openai/openai-cookbook/blob/main/examples/azure/chat.ipynb
|
||||
#### You can use ENGINE or DEPLOYMENT mode
|
||||
#OPENAI_API_TYPE: "azure"
|
||||
#OPENAI_API_BASE: "YOUR_AZURE_ENDPOINT"
|
||||
#OPENAI_API_KEY: "YOUR_AZURE_API_KEY"
|
||||
#OPENAI_API_VERSION: "YOUR_AZURE_API_VERSION"
|
||||
#DEPLOYMENT_NAME: "YOUR_DEPLOYMENT_NAME"
|
||||
#DEPLOYMENT_ID: "YOUR_DEPLOYMENT_ID"
|
||||
OPENAI_API_TYPE: "azure"
|
||||
OPENAI_API_BASE: "https://deepwisdom.openai.azure.com/"
|
||||
OPENAI_API_KEY: "02ae6058d09849c691176befeae2107c"
|
||||
#OPENAI_API_VERSION: "2023-05-15"
|
||||
OPENAI_API_VERSION: "2023-07-01-preview"
|
||||
DEPLOYMENT_ID: "GPT-4"
|
||||
OPENAI_API_ENGINE: "gpt-4"
|
||||
|
||||
#### if zhipuai from `https://open.bigmodel.cn`. You can set here or export API_KEY="YOUR_API_KEY"
|
||||
# ZHIPUAI_API_KEY: "YOUR_API_KEY"
|
||||
|
|
|
|||
|
|
@ -16,10 +16,14 @@ from metagpt.prompts.ml_engineer import (
|
|||
ML_SPECIFIC_PROMPT,
|
||||
ML_MODULE_MAP,
|
||||
TOOL_OUTPUT_DESC,
|
||||
TOOL_USAGE_PROMPT,
|
||||
)
|
||||
from metagpt.schema import Message, Plan
|
||||
from metagpt.tools.functions import registry
|
||||
from metagpt.utils.common import create_func_config
|
||||
from metagpt.prompts.ml_engineer import GEN_DATA_DESC_PROMPT, GENERATE_CODE_PROMPT
|
||||
from metagpt.utils.common import CodeParser
|
||||
from metagpt.actions.execute_code import ExecutePyCode
|
||||
|
||||
|
||||
class BaseWriteAnalysisCode(Action):
|
||||
|
|
@ -47,13 +51,13 @@ class BaseWriteAnalysisCode(Action):
|
|||
|
||||
# 添加默认的提示词
|
||||
if (
|
||||
default_system_msg not in messages[0]["content"]
|
||||
and messages[0]["role"] != "system"
|
||||
default_system_msg not in messages[0]["content"]
|
||||
and messages[0]["role"] != "system"
|
||||
):
|
||||
messages.insert(0, {"role": "system", "content": default_system_msg})
|
||||
elif (
|
||||
default_system_msg not in messages[0]["content"]
|
||||
and messages[0]["role"] == "system"
|
||||
default_system_msg not in messages[0]["content"]
|
||||
and messages[0]["role"] == "system"
|
||||
):
|
||||
messages[0] = {
|
||||
"role": "system",
|
||||
|
|
@ -62,7 +66,7 @@ class BaseWriteAnalysisCode(Action):
|
|||
return messages
|
||||
|
||||
async def run(
|
||||
self, context: List[Message], plan: Plan = None, code_steps: str = ""
|
||||
self, context: List[Message], plan: Plan = None, code_steps: str = ""
|
||||
) -> str:
|
||||
"""Run of a code writing action, used in data analysis or modeling
|
||||
|
||||
|
|
@ -83,12 +87,12 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
|
|||
super().__init__(name, context, llm)
|
||||
|
||||
async def run(
|
||||
self,
|
||||
context: [List[Message]],
|
||||
plan: Plan = None,
|
||||
code_steps: str = "",
|
||||
system_msg: str = None,
|
||||
**kwargs,
|
||||
self,
|
||||
context: [List[Message]],
|
||||
plan: Plan = None,
|
||||
code_steps: str = "",
|
||||
system_msg: str = None,
|
||||
**kwargs,
|
||||
) -> str:
|
||||
context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
|
||||
prompt = self.process_msg(context, system_msg)
|
||||
|
|
@ -98,6 +102,7 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
|
|||
|
||||
class WriteCodeWithTools(BaseWriteAnalysisCode):
|
||||
"""Write code with help of local available tools. Choose tools first, then generate code to use the tools"""
|
||||
execute_code = ExecutePyCode()
|
||||
|
||||
@staticmethod
|
||||
def _parse_recommend_tools(module: str, recommend_tools: list) -> List[Dict]:
|
||||
|
|
@ -121,10 +126,10 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
|
|||
return tool_catalog
|
||||
|
||||
async def _tool_recommendation(
|
||||
self,
|
||||
context: [List[Message]],
|
||||
code_steps: str,
|
||||
available_tools: list
|
||||
self,
|
||||
context: [List[Message]],
|
||||
code_steps: str,
|
||||
available_tools: list
|
||||
) -> list:
|
||||
"""
|
||||
Recommend tools for the specified task.
|
||||
|
|
@ -148,15 +153,28 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
|
|||
recommend_tools = rsp["recommend_tools"]
|
||||
return recommend_tools
|
||||
|
||||
|
||||
async def run(
|
||||
self,
|
||||
context: List[Message],
|
||||
plan: Plan = None,
|
||||
code_steps: str = "",
|
||||
self,
|
||||
context: List[Message],
|
||||
plan: Plan = None,
|
||||
code_steps: str = "",
|
||||
**kwargs,
|
||||
) -> str:
|
||||
task_type = plan.current_task.task_type
|
||||
logger.info(f"task_type is: {task_type}")
|
||||
available_tools = registry.get_all_schema_by_module(task_type)
|
||||
special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "")
|
||||
|
||||
# special_prompt = ML_SPECIFIC_PROMPT.get(task_type, "")
|
||||
|
||||
finished_tasks = plan.get_finished_tasks()
|
||||
code_context = [task.code for task in finished_tasks]
|
||||
|
||||
code_context = "\n\n".join(code_context)
|
||||
|
||||
### add runtime info
|
||||
result, success = await self.execute_code.run(code_context)
|
||||
logger.info(result)
|
||||
|
||||
if len(available_tools) > 0:
|
||||
available_tools = [
|
||||
|
|
@ -164,25 +182,46 @@ class WriteCodeWithTools(BaseWriteAnalysisCode):
|
|||
for tool in available_tools
|
||||
]
|
||||
|
||||
final_code = code_context
|
||||
|
||||
recommend_tools = await self._tool_recommendation(context, code_steps, available_tools)
|
||||
tool_catalog = self._parse_recommend_tools(task_type, recommend_tools)
|
||||
logger.info(f"Recommended tools: \n{recommend_tools}")
|
||||
|
||||
module_name = ML_MODULE_MAP[task_type]
|
||||
output_desc = TOOL_OUTPUT_DESC.get(task_type, "")
|
||||
prompt = TOO_ORGANIZATION_PROMPT.format(
|
||||
special_prompt=special_prompt,
|
||||
|
||||
hist_info = f"Previous finished code is \n\n ```Python {final_code} ``` \n\n " \
|
||||
f"Runtime result is {result} \n\n"
|
||||
|
||||
prompt = TOOL_USAGE_PROMPT.format(
|
||||
goal=plan.current_task.instruction,
|
||||
context=hist_info,
|
||||
code_steps=code_steps,
|
||||
module_name=module_name,
|
||||
output_desc=output_desc,
|
||||
function_catalog=tool_catalog,
|
||||
)
|
||||
context.append(Message(content=prompt, role="user"))
|
||||
else:
|
||||
context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
|
||||
context.append(Message(content=special_prompt, role="user"))
|
||||
|
||||
prompt = self.process_msg(context)
|
||||
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
return rsp["code"]
|
||||
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
|
||||
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
logger.info(f"rsp is: {rsp}")
|
||||
final_code = final_code + "\n\n" + rsp["code"]
|
||||
|
||||
return final_code
|
||||
|
||||
else:
|
||||
hist_info = f"Previous finished code is \n\n ```Python {code_context} ``` \n\n " \
|
||||
f"runtime result is {result} \n\n"
|
||||
|
||||
prompt = GENERATE_CODE_PROMPT.format(
|
||||
goal=plan.current_task.instruction,
|
||||
context=hist_info,
|
||||
)
|
||||
|
||||
tool_config = create_func_config(CODE_GENERATOR_WITH_TOOLS)
|
||||
logger.info(f"prompt is: {prompt}")
|
||||
rsp = await self.llm.aask_code(prompt, **tool_config)
|
||||
logger.info(f"rsp is: {rsp}")
|
||||
return rsp["code"]
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@ Output the information in a JSON format, as shown in this example:
|
|||
```
|
||||
"""
|
||||
|
||||
|
||||
ASSIGN_TASK_TYPE_PROMPT = """
|
||||
Please assign a task type to each task in the list below from the given categories:
|
||||
{task_list}
|
||||
|
|
@ -53,7 +52,6 @@ ASSIGN_TASK_TYPE = {
|
|||
},
|
||||
}
|
||||
|
||||
|
||||
TOOL_RECOMMENDATION_PROMPT = """
|
||||
Your are a tool recommender, the main goal is to recommend suitable tools for current task before coding. A tool means a function that can be used to help you solve the task.
|
||||
|
||||
|
|
@ -88,7 +86,6 @@ SELECT_FUNCTION_TOOLS = {
|
|||
},
|
||||
}
|
||||
|
||||
|
||||
CODE_GENERATOR_WITH_TOOLS = {
|
||||
"name": "add_subtask_code",
|
||||
"description": "Add new code cell of current task to the end of an active Jupyter notebook.",
|
||||
|
|
@ -104,6 +101,54 @@ CODE_GENERATOR_WITH_TOOLS = {
|
|||
},
|
||||
}
|
||||
|
||||
TOOL_USAGE_PROMPT = """
|
||||
## Target
|
||||
{goal}
|
||||
|
||||
## History Info
|
||||
{context}
|
||||
|
||||
## Available Tools:
|
||||
Each function is described in JSON format, including the function name and parameters. {output_desc}
|
||||
{function_catalog}
|
||||
|
||||
When you call a function above, you should import the function from `{module_name}` first, e.g.:
|
||||
```python
|
||||
from metagpt.tools.functions.libs.data_preprocess import fill_missing_value
|
||||
```end
|
||||
|
||||
## Your Output Format:
|
||||
Generate the complete code for this task:
|
||||
```python
|
||||
# Tools used: [function names or 'none']
|
||||
<your code for the current task, without any comments>
|
||||
```end
|
||||
|
||||
## Attention:
|
||||
Make sure use the columns from the dataset columns
|
||||
Finish your coding tasks as a helpful programmer based on the tools.
|
||||
|
||||
"""
|
||||
GENERATE_CODE_PROMPT = """
|
||||
## Target
|
||||
{goal}
|
||||
|
||||
## History Info
|
||||
{context}
|
||||
|
||||
## Your Output Format:
|
||||
Generate the complete code for this task:
|
||||
```python
|
||||
# Tools used: [function names or 'none']
|
||||
<your code for the current task>
|
||||
```end
|
||||
|
||||
## Attention:
|
||||
Make sure use the columns from the dataset columns
|
||||
Finish your coding tasks as a helpful programmer based on the tools.
|
||||
|
||||
"""
|
||||
|
||||
TOO_ORGANIZATION_PROMPT = """
|
||||
The previous conversation has provided all tasks step-by-step for the use goal and their statuses.
|
||||
Now, begin writing code for the current task. This code should writen strictly on the basis of all previous completed tasks code, not a standalone code. And avoid writing duplicate code that has already been written in previous tasks, such as repeated import of packages, reading data, etc.
|
||||
|
|
@ -167,7 +212,6 @@ CLASSIFICATION_MODEL_OUTPUT_DESC = ""
|
|||
|
||||
REGRESSION_MODEL_OUTPUT_DESC = ""
|
||||
|
||||
|
||||
ML_SPECIFIC_PROMPT = {
|
||||
"data_preprocess": DATA_PREPROCESS_PROMPT,
|
||||
"feature_engineering": FEATURE_ENGINEERING_PROMPT,
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ class MLEngineer(Role):
|
|||
if self.data_path:
|
||||
self.data_desc = await self._generate_data_desc()
|
||||
|
||||
|
||||
# create initial plan and update until confirmation
|
||||
await self._update_plan()
|
||||
|
||||
|
|
@ -200,12 +201,10 @@ class MLEngineer(Role):
|
|||
success = False
|
||||
while not success and counter < max_retry:
|
||||
context = self.get_useful_memories()
|
||||
|
||||
# print("*" * 10)
|
||||
# print(context)
|
||||
# print("*" * 10)
|
||||
# breakpoint()
|
||||
|
||||
column_names_dict = {key: value["column_info"] for key,value in self.data_desc.items()}
|
||||
|
||||
if not self.use_tools or self.plan.current_task.task_type == "other":
|
||||
logger.info("Write code with pure generation")
|
||||
# code = "print('abc')"
|
||||
|
|
@ -215,8 +214,9 @@ class MLEngineer(Role):
|
|||
cause_by = WriteCodeByGenerate
|
||||
else:
|
||||
logger.info("Write code with tools")
|
||||
|
||||
code = await WriteCodeWithTools().run(
|
||||
context=context, plan=self.plan, code_steps=code_steps,
|
||||
context=context, plan=self.plan, code_steps=code_steps, **{"column_names": column_names_dict}
|
||||
)
|
||||
cause_by = WriteCodeWithTools
|
||||
|
||||
|
|
@ -296,8 +296,10 @@ if __name__ == "__main__":
|
|||
# requirement = "Run data analysis on sklearn Wisconsin Breast Cancer dataset, include a plot, train a model to predict targets (20% as validation), and show validation accuracy"
|
||||
# requirement = "Run EDA and visualization on this dataset, train a model to predict survival, report metrics on validation set (20%), dataset: workspace/titanic/train.csv"
|
||||
|
||||
from metagpt.const import DATA_PATH
|
||||
|
||||
requirement = "Perform data analysis on the provided data. Train a model to predict the target variable Survived. Include data preprocessing, feature engineering, and modeling in your pipeline. The metric is accuracy."
|
||||
data_path = "/data/lidanyang/tabular_data/titanic"
|
||||
data_path = f"{DATA_PATH}/titanic"
|
||||
|
||||
async def main(requirement: str = requirement, auto_run: bool = True, data_path: str = data_path):
|
||||
role = MLEngineer(goal=requirement, auto_run=auto_run, data_path=data_path)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from typing import Any, Callable, Coroutine, Literal, overload
|
|||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.tools import WebBrowserEngineType
|
||||
from metagpt.utils.parse_html import WebPage
|
||||
# from metagpt.utils.parse_html import WebPage
|
||||
|
||||
|
||||
class WebBrowserEngine:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
@File : __init__.py
|
||||
"""
|
||||
|
||||
from metagpt.utils.read_document import read_docx
|
||||
# from metagpt.utils.read_document import read_docx
|
||||
from metagpt.utils.singleton import Singleton
|
||||
from metagpt.utils.token_counter import (
|
||||
TOKEN_COSTS,
|
||||
|
|
@ -16,7 +16,7 @@ from metagpt.utils.token_counter import (
|
|||
|
||||
|
||||
__all__ = [
|
||||
"read_docx",
|
||||
# "read_docx",
|
||||
"Singleton",
|
||||
"TOKEN_COSTS",
|
||||
"count_message_tokens",
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ tqdm==4.64.0
|
|||
# webdriver_manager<3.9
|
||||
anthropic==0.3.6
|
||||
typing-inspect==0.8.0
|
||||
typing_extensions==4.5.0
|
||||
libcst==1.0.1
|
||||
qdrant-client==1.4.0
|
||||
pytest-mock==3.11.1
|
||||
|
|
@ -46,7 +45,6 @@ wrapt==1.15.0
|
|||
websocket-client==0.58.0
|
||||
zhipuai==1.0.7
|
||||
rich==13.6.0
|
||||
nbclient==0.9.0
|
||||
nbformat==5.9.2
|
||||
ipython==8.17.2
|
||||
ipykernel==6.27.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue