Merge pull request #992 from garylin2099/di_fixes

update examples etc.
This commit is contained in:
garylin2099 2024-03-12 17:00:11 +08:00 committed by GitHub
commit c88d76a720
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 61 additions and 21 deletions

View file

@ -7,13 +7,31 @@
from metagpt.roles.di.data_interpreter import DataInterpreter
PAPER_LIST_REQ = """"
Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*
"""
ECOMMERCE_REQ = """
Get products data from website https://scrapeme.live/shop/ and save it as a csv file.
**Notice: Firstly parse the web page encoding and the text HTML structure;
The first page product name, price, product URL, and image URL must be saved in the csv;**
"""
NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**;
下面是一个大致流程, 你会根据每一步的运行结果对当前计划中的任务做出适当调整:
1. 爬取并本地保存html结构;
2. 直接打印第7个*`快讯`*关键词后2000个字符的html内容, 作为*快讯的html内容示例*;
3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题链接时间;
4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个
5. 将全部结果存在本地csv中
"""
async def main():
prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables*"""
di = DataInterpreter(use_tools=True)
di = DataInterpreter(tools=["scrape_web_playwright"])
await di.run(prompt)
await di.run(ECOMMERCE_REQ)
if __name__ == "__main__":

View file

@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)

View file

@ -22,7 +22,7 @@ async def main():
Firstly, Please help me fetch the latest 5 senders and full letter contents.
Then, summarize each of the 5 emails into one sentence (you can do this by yourself, no need to import other models to do this) and output them in a markdown format."""
di = DataInterpreter(use_tools=True)
di = DataInterpreter()
await di.run(prompt)

View file

@ -15,7 +15,7 @@ Firstly, utilize Selenium and WebDriver for rendering.
Secondly, convert image to a webpage including HTML, CSS and JS in one go.
Finally, save webpage in a text file.
Note: All required dependencies and environments have been fully installed and configured."""
di = DataInterpreter(use_tools=True)
di = DataInterpreter(tools=["GPTvGenerator"])
await di.run(prompt)

View file

@ -1,10 +1,10 @@
import asyncio
from metagpt.roles.di.ml_engineer import MLEngineer
from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str):
role = MLEngineer(auto_run=True, use_tools=True)
role = DataInterpreter(use_reflection=True, tools=["<all>"])
await role.run(requirement)

View file

@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)

View file

@ -8,7 +8,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=True, goal=requirement)
di = DataInterpreter(tools=["SDEngine"])
await di.run(requirement)

View file

@ -4,7 +4,7 @@ from metagpt.roles.di.data_interpreter import DataInterpreter
async def main(requirement: str = ""):
di = DataInterpreter(use_tools=False)
di = DataInterpreter()
await di.run(requirement)

View file

@ -9,7 +9,7 @@ from metagpt.actions.di.ask_review import ReviewConst
from metagpt.actions.di.execute_nb_code import ExecuteNbCode
from metagpt.actions.di.write_analysis_code import CheckData, WriteCodeWithTools
from metagpt.logs import logger
from metagpt.prompts.mi.write_analysis_code import DATA_INFO
from metagpt.prompts.di.write_analysis_code import DATA_INFO
from metagpt.roles import Role
from metagpt.schema import Message, Task, TaskResult
from metagpt.strategy.task_type import TaskType

View file

@ -19,29 +19,31 @@ class TaskTypeDef(BaseModel):
class TaskType(Enum):
"""By identifying specific types of tasks, we can inject human priors (guidance) to help task solving"""
EDA = TaskTypeDef(
name="eda",
desc="For performing exploratory data analysis",
guidance=EDA_PROMPT,
)
DATA_PREPROCESS = TaskTypeDef(
name="data_preprocess",
name="data preprocessing",
desc="For preprocessing dataset in a data analysis or machine learning task ONLY,"
"general data operation doesn't fall into this type",
guidance=DATA_PREPROCESS_PROMPT,
)
FEATURE_ENGINEERING = TaskTypeDef(
name="feature_engineering",
name="feature engineering",
desc="Only for creating new columns for input data.",
guidance=FEATURE_ENGINEERING_PROMPT,
)
MODEL_TRAIN = TaskTypeDef(
name="model_train",
name="model train",
desc="Only for training model.",
guidance=MODEL_TRAIN_PROMPT,
)
MODEL_EVALUATE = TaskTypeDef(
name="model_evaluate",
name="model evaluate",
desc="Only for evaluating model.",
guidance=MODEL_EVALUATE_PROMPT,
)

View file

@ -14,7 +14,6 @@ import requests
from aiohttp import ClientSession
from PIL import Image, PngImagePlugin
#
from metagpt.const import SD_OUTPUT_FILE_REPO, SOURCE_ROOT
from metagpt.logs import logger
from metagpt.tools.tool_registry import register_tool

View file

@ -57,13 +57,12 @@ class RecommendTool(Action):
class ToolRecommender(BaseModel):
"""
The default ToolRecommender:
1. Recall: If plan exists, use exact match between task type and tool type to recall tools;
If plan doesn't exist (e.g. we use ReAct), return all user-specified tools;
1. Recall: To be implemented in subclasses. Recall tools based on the given context and plan.
2. Rank: Use LLM to select final candidates from recalled set.
"""
tools: dict[str, Tool] = {}
force: bool = False
force: bool = False # whether to forcedly recommend the specified tools
@field_validator("tools", mode="before")
@classmethod
@ -146,6 +145,26 @@ class ToolRecommender(BaseModel):
return list(valid_tools.values())[:topk]
class TypeMatchToolRecommender(ToolRecommender):
"""
A legacy ToolRecommender using task type matching at the recall stage:
1. Recall: Find tools based on exact match between task type and tool tag;
2. Rank: LLM rank, the same as the default ToolRecommender.
"""
async def recall_tools(self, context: str = "", plan: Plan = None, topk: int = 20) -> list[Tool]:
if not plan:
return list(self.tools.values())[:topk]
# find tools based on exact match between task type and tool tag
task_type = plan.current_task.task_type
candidate_tools = TOOL_REGISTRY.get_tools_by_tag(task_type)
candidate_tool_names = set(self.tools.keys()) & candidate_tools.keys()
recalled_tools = [candidate_tools[tool_name] for tool_name in candidate_tool_names]
return recalled_tools[:topk]
class BM25ToolRecommender(ToolRecommender):
"""
A ToolRecommender using BM25 at the recall stage:

View file

@ -71,3 +71,5 @@ Pillow
imap_tools==1.5.0 # Used by metagpt/tools/libs/email_login.py
qianfan==0.3.2
dashscope==1.14.1
rank-bm25==0.2.2 # for tool recommendation
jieba==0.42.1 # for tool recommendation