mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-21 14:05:17 +02:00
Optimize: Update certain prompt formats.
This commit is contained in:
parent
2d8a3f1296
commit
e43ae21d98
9 changed files with 43 additions and 49 deletions
|
|
@ -6,7 +6,7 @@ from metagpt.roles.researcher import RESEARCH_PATH, Researcher
|
|||
|
||||
|
||||
async def main():
|
||||
topic = "dataiku .vs datarobot"
|
||||
topic = "dataiku vs. datarobot"
|
||||
role = Researcher(language="en-us")
|
||||
await role.run(topic)
|
||||
print(f"save report to {RESEARCH_PATH / f'{topic}.md'}.")
|
||||
|
|
|
|||
|
|
@ -18,43 +18,34 @@ from metagpt.utils.text import generate_prompt_chunk, reduce_message_length
|
|||
LANG_PROMPT = "Please respond in {language}."
|
||||
|
||||
RESEARCH_BASE_SYSTEM = """You are an AI critical thinker research assistant. Your sole purpose is to write well \
|
||||
written, critically acclaimed, objective and structured reports on given text."""
|
||||
written, critically acclaimed, objective and structured reports on the given text."""
|
||||
|
||||
RESEARCH_TOPIC_SYSTEM = "You are an AI researcher assistant, and your research topic is \"{topic}\"."
|
||||
RESEARCH_TOPIC_SYSTEM = "You are an AI researcher assistant, and your research topic is:\n#TOPIC#\n{topic}"
|
||||
|
||||
SEARCH_TOPIC_PROMPT = """Please provide up to 2 necessary keywords related to your research topic that require Google search. \
|
||||
SEARCH_TOPIC_PROMPT = """Please provide up to 2 necessary keywords related to your research topic for Google search. \
|
||||
Your response must be in JSON format, for example: ["keyword1", "keyword2"]."""
|
||||
|
||||
SUMMARIZE_SEARCH_PROMPT = """### Requirements
|
||||
1. The keywords related to your research topic and the search results are shown in the "Reference Information" section.
|
||||
1. The keywords related to your research topic and the search results are shown in the "Search Result Information" section.
|
||||
2. Provide up to {decomposition_nums} queries related to your research topic base on the search results.
|
||||
3. Please respond in JSON format as follows: ["query1", "query2", "query3", ...].
|
||||
3. Please respond in the following JSON format: ["query1", "query2", "query3", ...].
|
||||
|
||||
### Reference Information
|
||||
{search}
|
||||
### Search Result Information
|
||||
{search_results}
|
||||
"""
|
||||
|
||||
DECOMPOSITION_PROMPT = """You are a researcher, and before delving into an topic, you break it down into several \
|
||||
sub-questions. These sub-questions can be researched through online searches to gather objective opinions about the given \
|
||||
topic.
|
||||
COLLECT_AND_RANKURLS_PROMPT = """### Topic
|
||||
{topic}
|
||||
### Query
|
||||
{query}
|
||||
|
||||
---
|
||||
The topic is: {topic}
|
||||
### The online search results
|
||||
{results}
|
||||
|
||||
---
|
||||
Now, please break down the provided topic into {decomposition_nums} search questions. You should respond with an array of \
|
||||
strings in JSON format like ["question1", "question2", ...].
|
||||
"""
|
||||
|
||||
COLLECT_AND_RANKURLS_PROMPT = """### Reference Information
|
||||
1. Topic: "{topic}"
|
||||
2. Query: "{query}"
|
||||
3. The online search results: {results}
|
||||
|
||||
---
|
||||
### Requirements
|
||||
Please remove irrelevant search results that are not related to the query or topic. Then, sort the remaining search results \
|
||||
based on link credibility. If two results have equal credibility, prioritize them based on relevance. Provide the ranked \
|
||||
results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
|
||||
based on the link credibility. If two results have equal credibility, prioritize them based on the relevance. Provide the
|
||||
ranked results' indices in JSON format, like [0, 1, 3, 4, ...], without including other words.
|
||||
"""
|
||||
|
||||
WEB_BROWSE_AND_SUMMARIZE_PROMPT = '''### Requirements
|
||||
|
|
@ -74,7 +65,7 @@ CONDUCT_RESEARCH_PROMPT = '''### Reference Information
|
|||
|
||||
### Requirements
|
||||
Please provide a detailed research report in response to the following topic: "{topic}", using the information provided \
|
||||
above. The report must adhere to the following requirements:
|
||||
above. The report must meet the following requirements:
|
||||
|
||||
- Focus on directly addressing the chosen topic.
|
||||
- Ensure a well-structured and in-depth presentation, incorporating relevant facts and figures where available.
|
||||
|
|
@ -117,9 +108,7 @@ class CollectLinks(Action):
|
|||
A dictionary containing the search questions as keys and the collected URLs as values.
|
||||
"""
|
||||
system_text = system_text if system_text else RESEARCH_TOPIC_SYSTEM.format(topic=topic)
|
||||
search_topic_prompt = SEARCH_TOPIC_PROMPT.format(topic=topic)
|
||||
logger.debug(search_topic_prompt)
|
||||
keywords = await self._aask(search_topic_prompt, [system_text])
|
||||
keywords = await self._aask(SEARCH_TOPIC_PROMPT, [system_text])
|
||||
try:
|
||||
keywords = json.loads(keywords)
|
||||
keywords = parse_obj_as(list[str], keywords)
|
||||
|
|
@ -130,8 +119,8 @@ class CollectLinks(Action):
|
|||
|
||||
def gen_msg():
|
||||
while True:
|
||||
search = "\n".join(f"#### Keyword: {i}\n Search Result: {j}\n" for (i, j) in zip(keywords, results))
|
||||
prompt = SUMMARIZE_SEARCH_PROMPT.format(decomposition_nums=decomposition_nums, search=search)
|
||||
search_results = "\n".join(f"#### Keyword: {i}\n Search Result: {j}\n" for (i, j) in zip(keywords, results))
|
||||
prompt = SUMMARIZE_SEARCH_PROMPT.format(decomposition_nums=decomposition_nums, search_results=search_results)
|
||||
yield prompt
|
||||
remove = max(results, key=len)
|
||||
remove.pop()
|
||||
|
|
@ -144,7 +133,7 @@ class CollectLinks(Action):
|
|||
queries = json.loads(queries)
|
||||
queries = parse_obj_as(list[str], queries)
|
||||
except Exception as e:
|
||||
logger.exception(f"fail to break down the research question for {e}")
|
||||
logger.exception(f"fail to break down the research question due to {e}")
|
||||
queries = keywords
|
||||
ret = {}
|
||||
for query in queries:
|
||||
|
|
|
|||
|
|
@ -270,4 +270,4 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
|
|||
def get_max_tokens(self, messages: list[dict]):
|
||||
if not self.auto_max_tokens:
|
||||
return CONFIG.max_tokens_rsp
|
||||
return get_max_completion_tokens(messages, self.model)
|
||||
return get_max_completion_tokens(messages, self.model, CONFIG.max_tokens_rsp)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class Report(BaseModel):
|
|||
class Researcher(Role):
|
||||
def __init__(
|
||||
self,
|
||||
name: str = "Bob",
|
||||
name: str = "David",
|
||||
profile: str = "Researcher",
|
||||
goal: str = "Gather information and conduct research",
|
||||
constraints: str = "Ensure accuracy and relevance of information",
|
||||
|
|
@ -88,4 +88,4 @@ class Researcher(Role):
|
|||
|
||||
if __name__ == "__main__":
|
||||
role = Researcher(language="en-us")
|
||||
asyncio.run(role.run("dataiku .vs datarobot"))
|
||||
asyncio.run(role.run("dataiku vs. datarobot"))
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import Generator, Optional
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from pydantic import BaseModel
|
||||
|
|
@ -35,11 +35,11 @@ class WebPage(BaseModel):
|
|||
def get_links(self) -> Generator[str, None, None]:
|
||||
for i in self.soup.find_all("a", href=True):
|
||||
url = i["href"]
|
||||
if url.startswith("data:"):
|
||||
continue
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = urljoin(self.url, url)
|
||||
yield url
|
||||
result = urlparse(url)
|
||||
if not result.scheme and result.path:
|
||||
yield urljoin(self.url, url)
|
||||
elif url.startswith(("http://", "https://")):
|
||||
yield urljoin(self.url, url)
|
||||
|
||||
|
||||
def get_html_content(page: str, base: str):
|
||||
|
|
|
|||
|
|
@ -4,19 +4,19 @@ from metagpt.utils.token_counter import TOKEN_MAX, count_string_tokens
|
|||
|
||||
|
||||
def reduce_message_length(msgs: Generator[str, None, None], model_name: str, system_text: str, reserved: int = 0,) -> str:
|
||||
"""Reduce the length of messages to fit within the maximum token size.
|
||||
"""Reduce the length of concatenated message segments to fit within the maximum token size.
|
||||
|
||||
Args:
|
||||
msgs: A generator of strings representing the messages.
|
||||
msgs: A generator of strings representing progressively shorter valid prompts.
|
||||
model_name: The name of the encoding to use. (e.g., "gpt-3.5-turbo")
|
||||
system_text: The system prompts.
|
||||
reserved: The number of reserved tokens.
|
||||
|
||||
Returns:
|
||||
The reduced message.
|
||||
The concatenated message segments reduced to fit within the maximum token size.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If it fails to reduce the message length.
|
||||
RuntimeError: If it fails to reduce the concatenated message length.
|
||||
"""
|
||||
max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
|
||||
for msg in msgs:
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ def count_string_tokens(string: str, model_name: str) -> int:
|
|||
return len(encoding.encode(string))
|
||||
|
||||
|
||||
def get_max_completion_tokens(messages: list[dict], model: str):
|
||||
def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> int:
|
||||
"""Calculate the maximum number of completion tokens for a given model and list of messages.
|
||||
|
||||
Args:
|
||||
|
|
@ -106,4 +106,6 @@ def get_max_completion_tokens(messages: list[dict], model: str):
|
|||
Returns:
|
||||
The maximum number of completion tokens.
|
||||
"""
|
||||
return TOKEN_MAX.get(model, 4096) - count_message_tokens(messages)
|
||||
if model not in TOKEN_MAX:
|
||||
return default
|
||||
return TOKEN_MAX[model] - count_message_tokens(messages)
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ async def mock_llm_ask(self, prompt: str, system_msgs):
|
|||
@pytest.mark.asyncio
|
||||
async def test_researcher(mocker):
|
||||
with TemporaryDirectory() as dirname:
|
||||
topic = "dataiku .vs datarobot"
|
||||
topic = "dataiku vs. datarobot"
|
||||
mocker.patch("metagpt.provider.base_gpt_api.BaseGPTAPI.aask", mock_llm_ask)
|
||||
researcher.RESEARCH_PATH = Path(dirname)
|
||||
await researcher.Researcher().run(topic)
|
||||
|
|
|
|||
|
|
@ -44,6 +44,9 @@ PAGE = """
|
|||
<div class="box">
|
||||
<p>This is a div with a class "box".</p>
|
||||
<p><a href="https://metagpt.com">a link</a></p>
|
||||
<p><a href="#section2"></a></p>
|
||||
<p><a href="ftp://192.168.1.1:8080"></a></p>
|
||||
<p><a href="javascript:alert('Hello');"></a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue