From 0b7d7bdf559976ef6f060204bd3ddcff78bc2533 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Tue, 9 Jul 2024 17:00:08 +0800 Subject: [PATCH] add prompt for scraping task --- metagpt/prompts/task_type.py | 6 ++++++ metagpt/strategy/task_type.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/metagpt/prompts/task_type.py b/metagpt/prompts/task_type.py index 5b1ffc744..312421c21 100644 --- a/metagpt/prompts/task_type.py +++ b/metagpt/prompts/task_type.py @@ -53,3 +53,9 @@ The current task is about converting image into webpage code. please note the fo - Single-Step Code Generation: Execute the entire code generation process in a single step, encompassing HTML, CSS, and JavaScript. Avoid fragmenting the code generation into multiple separate steps to maintain consistency and simplify the development workflow. - Save webpages: Be sure to use the save method provided. """ + +# Prompt for taking on "web_scraping" tasks +WEB_SCRAPING_PROMPT = """ +- Remember to view and print the necessary HTML content in a separate task to understand the structure first before scraping data. +- Since the data required by user may not correspond directly to the actual HTML element names, you should thoroughly analyze the HTML structure and meanings of all elements in the executing result first. Ensure the `class_` in your code should derived from the actual HTML structure directly, not based on your knowledge. To ensure it, analyse the most suitable location of the 'class_' in the actual HTML content before code. +""" diff --git a/metagpt/strategy/task_type.py b/metagpt/strategy/task_type.py index 2bc53b964..b44cc3ac0 100644 --- a/metagpt/strategy/task_type.py +++ b/metagpt/strategy/task_type.py @@ -8,7 +8,7 @@ from metagpt.prompts.task_type import ( FEATURE_ENGINEERING_PROMPT, IMAGE2WEBPAGE_PROMPT, MODEL_EVALUATE_PROMPT, - MODEL_TRAIN_PROMPT, + MODEL_TRAIN_PROMPT, WEB_SCRAPING_PROMPT, ) @@ -62,6 +62,7 @@ class TaskType(Enum): WEBSCRAPING = TaskTypeDef( name="web scraping", desc="For scraping data from web pages.", + guidance=WEB_SCRAPING_PROMPT, ) EMAIL_LOGIN = TaskTypeDef( name="email login",