diff --git a/config/vault.example.yaml b/config/vault.example.yaml new file mode 100644 index 000000000..0e197d2a8 --- /dev/null +++ b/config/vault.example.yaml @@ -0,0 +1,48 @@ +# Usage: +# 1. Get value. +# >>> from metagpt.tools.libs.env import get_env +# >>> access_token = await get_env(key="access_token", app_name="github") +# >>> print(access_token) +# YOUR_ACCESS_TOKEN +# +# 2. Get description for LLM understanding. +# >>> from metagpt.tools.libs.env import get_env_description +# >>> descriptions = await get_env_description +# >>> for k, desc in descriptions.items(): +# >>> print(f"{key}:{desc}") +# await get_env(key="access_token", app_name="github"):Get github access token +# await get_env(key="access_token", app_name="gitlab"):Get gitlab access token +# ... + +vault: + github: + values: + access_token: "YOUR_ACCESS_TOKEN" + descriptions: + access_token: "Get github access token" + gitlab: + values: + access_token: "YOUR_ACCESS_TOKEN" + descriptions: + access_token: "Get gitlab access token" + iflytek_tts: + values: + api_id: "YOUR_APP_ID" + api_key: "YOUR_API_KEY" + api_secret: "YOUR_API_SECRET" + descriptions: + api_id: "Get the API ID of IFlyTek Text to Speech" + api_key: "Get the API KEY of IFlyTek Text to Speech" + api_secret: "Get the API SECRET of IFlyTek Text to Speech" + azure_tts: + values: + subscription_key: "YOUR_SUBSCRIPTION_KEY" + region: "YOUR_REGION" + descriptions: + subscription_key: "Get the subscription key of Azure Text to Speech." + region: "Get the region of Azure Text to Speech." + default: # All key-value pairs whose app name is an empty string are placed below + values: + proxy: "YOUR_PROXY" + descriptions: + proxy: "Get proxy for tools like requests, playwright, selenium, etc." \ No newline at end of file diff --git a/examples/di/crawl_webpage.py b/examples/di/crawl_webpage.py index b8226f4f4..10b230f2b 100644 --- a/examples/di/crawl_webpage.py +++ b/examples/di/crawl_webpage.py @@ -6,16 +6,19 @@ """ from metagpt.roles.di.data_interpreter import DataInterpreter +from metagpt.tools.libs.browser import Browser as _ + PAPER_LIST_REQ = """" Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/, -and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key variables* +and save it to a csv file. paper title must include `multiagent` or `large language model`. +**Notice: view the page element before writing scraping code** """ ECOMMERCE_REQ = """ Get products data from website https://scrapeme.live/shop/ and save it as a csv file. -**Notice: Firstly parse the web page encoding and the text HTML structure; -The first page product name, price, product URL, and image URL must be saved in the csv;** +The first page product name, price, product URL, and image URL must be saved in the csv. +**Notice: view the page element before writing scraping code** """ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 所有初创企业融资的信息, **注意: 这是一个中文网站**; @@ -25,11 +28,12 @@ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash 3. 反思*快讯的html内容示例*中的规律, 设计正则匹配表达式来获取*`快讯`*的标题、链接、时间; 4. 筛选最近3天的初创企业融资*`快讯`*, 以list[dict]形式打印前5个。 5. 将全部结果存在本地csv中 +**Notice: view the page element before writing scraping code** """ async def main(): - di = DataInterpreter(tools=["scrape_web_playwright"]) + di = DataInterpreter(tools=["Browser"]) await di.run(ECOMMERCE_REQ) diff --git a/metagpt/actions/di/execute_nb_code.py b/metagpt/actions/di/execute_nb_code.py index b4fe949fe..64620d9cc 100644 --- a/metagpt/actions/di/execute_nb_code.py +++ b/metagpt/actions/di/execute_nb_code.py @@ -65,7 +65,7 @@ class ExecuteNbCode(Action): """execute notebook code block, return result to llm, and display it.""" nb: NotebookNode - nb_client: NotebookClient = None + nb_client: RealtimeOutputNotebookClient = None console: Console interaction: str timeout: int = 600 @@ -78,11 +78,15 @@ class ExecuteNbCode(Action): interaction=("ipython" if self.is_ipython() else "terminal"), ) self.reporter = NotebookReporter() + self.set_nb_client() + + def set_nb_client(self): self.nb_client = RealtimeOutputNotebookClient( - nb, - timeout=timeout, + self.nb, + timeout=self.timeout, resources={"metadata": {"path": DEFAULT_WORKSPACE_ROOT}}, notebook_reporter=self.reporter, + coalesce_streams=True, ) async def build(self): @@ -118,7 +122,7 @@ class ExecuteNbCode(Action): # sleep 1s to wait for the kernel to be cleaned up completely await asyncio.sleep(1) await self.build() - self.nb_client = NotebookClient(self.nb, timeout=self.timeout) + self.set_nb_client() def add_code_cell(self, code: str): self.nb.cells.append(new_code_cell(source=code)) diff --git a/metagpt/actions/write_code.py b/metagpt/actions/write_code.py index 7f225d469..da25fe621 100644 --- a/metagpt/actions/write_code.py +++ b/metagpt/actions/write_code.py @@ -161,7 +161,7 @@ class WriteCode(Action): filename=coding_context.filename, root_path=str(self.repo.src_relative_path) ) coding_context.code_doc.content = code - await reporter.async_report(self.repo.workdir / coding_context.code_doc.root_relative_path, "path") + await reporter.async_report(coding_context.code_doc, "document") return coding_context @staticmethod diff --git a/metagpt/actions/write_code_review.py b/metagpt/actions/write_code_review.py index 3912095df..ad99de2dd 100644 --- a/metagpt/actions/write_code_review.py +++ b/metagpt/actions/write_code_review.py @@ -18,6 +18,7 @@ from metagpt.logs import logger from metagpt.schema import CodingContext, Document from metagpt.utils.common import CodeParser from metagpt.utils.project_repo import ProjectRepo +from metagpt.utils.report import EditorReporter PROMPT_TEMPLATE = """ # System @@ -131,16 +132,23 @@ class WriteCodeReview(Action): input_args: Optional[BaseModel] = Field(default=None, exclude=True) @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) - async def write_code_review_and_rewrite(self, context_prompt, cr_prompt, filename): + async def write_code_review_and_rewrite(self, context_prompt, cr_prompt, doc): + filename = doc.filename cr_rsp = await self._aask(context_prompt + cr_prompt) result = CodeParser.parse_block("Code Review Result", cr_rsp) if "LGTM" in result: return result, None # if LBTM, rewrite code - rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" - code_rsp = await self._aask(rewrite_prompt) - code = CodeParser.parse_code(text=code_rsp) + async with EditorReporter(enable_llm_stream=True) as reporter: + await reporter.async_report( + {"type": "code", "filename": filename, "src_path": doc.root_relative_path}, "meta" + ) + rewrite_prompt = f"{context_prompt}\n{cr_rsp}\n{REWRITE_CODE_TEMPLATE.format(filename=filename)}" + code_rsp = await self._aask(rewrite_prompt) + code = CodeParser.parse_code(text=code_rsp) + doc.content = code + await reporter.async_report(doc, "document") return result, code async def run(self, *args, **kwargs) -> CodingContext: @@ -185,7 +193,7 @@ class WriteCodeReview(Action): f"len(self.i_context.code_doc.content)={len2}" ) result, rewrited_code = await self.write_code_review_and_rewrite( - context_prompt, cr_prompt, self.i_context.code_doc.filename + context_prompt, cr_prompt, self.i_context.code_doc ) if "LBTM" in result: iterative_code = rewrited_code diff --git a/metagpt/rag/engines/simple.py b/metagpt/rag/engines/simple.py index 5c5810308..623b3f350 100644 --- a/metagpt/rag/engines/simple.py +++ b/metagpt/rag/engines/simple.py @@ -4,6 +4,7 @@ import json import os from typing import Any, Optional, Union +from fsspec import AbstractFileSystem from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from llama_index.core.callbacks.base import CallbackManager from llama_index.core.embeddings import BaseEmbedding @@ -83,6 +84,7 @@ class SimpleEngine(RetrieverQueryEngine): llm: LLM = None, retriever_configs: list[BaseRetrieverConfig] = None, ranker_configs: list[BaseRankerConfig] = None, + fs: Optional[AbstractFileSystem] = None, ) -> "SimpleEngine": """From docs. @@ -100,7 +102,7 @@ class SimpleEngine(RetrieverQueryEngine): if not input_dir and not input_files: raise ValueError("Must provide either `input_dir` or `input_files`.") - documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files).load_data() + documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files, fs=fs).load_data() cls._fix_document_metadata(documents) index = VectorStoreIndex.from_documents( diff --git a/metagpt/roles/di/data_analyst.py b/metagpt/roles/di/data_analyst.py index 0fc95b9d6..fc298ea4c 100644 --- a/metagpt/roles/di/data_analyst.py +++ b/metagpt/roles/di/data_analyst.py @@ -20,6 +20,7 @@ from metagpt.strategy.thinking_command import ( ) from metagpt.tools.tool_recommend import BM25ToolRecommender from metagpt.utils.common import CodeParser +from metagpt.utils.report import ThoughtReporter class DataAnalyst(DataInterpreter): @@ -82,8 +83,8 @@ class DataAnalyst(DataInterpreter): available_commands=prepare_command_prompt(self.available_commands), ) context = self.llm.format_msg(self.working_memory.get() + [Message(content=prompt, role="user")]) - - rsp = await self.llm.aask(context) + async with ThoughtReporter(): + rsp = await self.llm.aask(context) self.commands = json.loads(CodeParser.parse_code(block=None, text=rsp)) self.rc.memory.add(Message(content=rsp, role="assistant")) diff --git a/metagpt/roles/di/data_interpreter.py b/metagpt/roles/di/data_interpreter.py index e147cbbe3..bdfc0e294 100644 --- a/metagpt/roles/di/data_interpreter.py +++ b/metagpt/roles/di/data_interpreter.py @@ -15,6 +15,7 @@ from metagpt.schema import Message, Task, TaskResult from metagpt.strategy.task_type import TaskType from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender from metagpt.utils.common import CodeParser +from metagpt.utils.report import ThoughtReporter REACT_THINK_PROMPT = """ # User Requirement @@ -73,7 +74,8 @@ class DataInterpreter(Role): return True prompt = REACT_THINK_PROMPT.format(user_requirement=self.user_requirement, context=context) - rsp = await self.llm.aask(prompt) + async with ThoughtReporter(): + rsp = await self.llm.aask(prompt) rsp_dict = json.loads(CodeParser.parse_code(text=rsp)) self.working_memory.add(Message(content=rsp_dict["thoughts"], role="assistant")) need_action = rsp_dict["state"] diff --git a/metagpt/roles/di/team_leader.py b/metagpt/roles/di/team_leader.py index a1ef11fa6..2fa782ade 100644 --- a/metagpt/roles/di/team_leader.py +++ b/metagpt/roles/di/team_leader.py @@ -20,6 +20,7 @@ from metagpt.strategy.thinking_command import ( run_commands, ) from metagpt.utils.common import CodeParser +from metagpt.utils.report import ThoughtReporter class TeamLeader(Role): @@ -69,7 +70,8 @@ class TeamLeader(Role): ) context = self.llm.format_msg(self.get_memories(k=10) + [Message(content=prompt, role="user")]) - rsp = await self.llm.aask(context, system_msgs=[SYSTEM_PROMPT]) + async with ThoughtReporter(): + rsp = await self.llm.aask(context, system_msgs=[SYSTEM_PROMPT]) self.commands = json.loads(CodeParser.parse_code(text=rsp)) self.rc.memory.add(Message(content=rsp, role="assistant")) diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py index 7fde804fe..8d6daec11 100644 --- a/metagpt/tools/libs/browser.py +++ b/metagpt/tools/libs/browser.py @@ -1,9 +1,12 @@ from __future__ import annotations +import contextlib from playwright.async_api import async_playwright - +from metagpt.utils.file import MemoryFileSystem +from uuid import uuid4 from metagpt.const import DEFAULT_WORKSPACE_ROOT from metagpt.tools.tool_registry import register_tool +from metagpt.utils.parse_html import simplify_html from metagpt.utils.report import BrowserReporter @@ -35,16 +38,49 @@ class Browser: print("Now on page ", url) await self._view() - async def open_new_page(self, url: str): + async def open_new_page(self, url: str, timeout: float = 30000): """open a new page in the browser and view the page""" async with self.reporter as reporter: page = await self.browser.new_page() await reporter.async_report(url, "url") - await page.goto(url) + await page.goto(url, timeout=timeout) self.pages[url] = page await self._set_current_page(page, url) await reporter.async_report(page, "page") + async def view_page_element_to_scrape(self, requirement: str, keep_links: bool = False) -> None: + """view the HTML content of current page to understand the structure. When executed, the content will be printed out + + Args: + requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements. + keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required + """ + html = await self.current_page.content() + html = simplify_html(html, url=self.current_page.url, keep_links=keep_links) + mem_fs = MemoryFileSystem() + filename = f"{uuid4().hex}.html" + with mem_fs.open(filename, "w") as f: + f.write(html) + + # Since RAG is an optional optimization, if it fails, the simplified HTML can be used as a fallback. + with contextlib.suppress(Exception): + + from metagpt.rag.engines import SimpleEngine # avoid circular import + + # TODO make `from_docs` asynchronous + engine = SimpleEngine.from_docs(input_files=[filename], fs=mem_fs) + nodes = await engine.aretrieve(requirement) + html = "\n".join(i.text for i in nodes) + + mem_fs.rm_file(filename) + print(html) + + async def get_page_content(self) -> str: + """Get the HTML content of current page.""" + html = await self.current_page.content() + html_content = html.strip() + return html_content + async def switch_page(self, url: str): """switch to an opened page in the browser and view the page""" if url in self.pages: @@ -152,8 +188,8 @@ class Browser: async def _view(self, keep_len: int = 5000) -> str: """simulate human viewing the current page, return the visible text with links""" - visible_text_with_links = await self.current_page.evaluate(VIEW_CONTENT_JS) - print("The visible text and their links (if any): ", visible_text_with_links[:keep_len]) + # visible_text_with_links = await self.current_page.evaluate(VIEW_CONTENT_JS) + # print("The visible text and their links (if any): ", visible_text_with_links[:keep_len]) # html_content = await self._view_page_html(keep_len=keep_len) # print("The html content: ", html_content) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index e032dcef5..78560e375 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -100,7 +100,7 @@ class Editor: file_path=file_path, block_content=block_content, ) - self.resource.report(result.file_path, "path") + self.resource.report(result.file_path, "path", extra={"type": "search", "line": i, "symbol": symbol}) return result return None diff --git a/metagpt/tools/libs/git.py b/metagpt/tools/libs/git.py index eb3fd6822..b4d759bf4 100644 --- a/metagpt/tools/libs/git.py +++ b/metagpt/tools/libs/git.py @@ -9,7 +9,6 @@ from github.Issue import Issue from github.PullRequest import PullRequest from metagpt.tools.tool_registry import register_tool -from metagpt.utils.git_repository import GitBranch, GitRepository @register_tool(tags=["software development", "git", "Commit the changes and push to remote git repository."]) @@ -18,7 +17,7 @@ async def git_push( access_token: str, comments: str = "Commit", new_branch: str = "", -) -> GitBranch: +) -> "GitBranch": """ Pushes changes from a local Git repository to its remote counterpart. @@ -49,6 +48,8 @@ async def git_push( base branch:'master', head branch:'feature/new', repo_name:'iorisa/snake-game' """ + from metagpt.utils.git_repository import GitRepository + if not GitRepository.is_git_dir(local_path): raise ValueError("Invalid local git repository") diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index 9e9bb034c..7303d1f47 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -646,7 +646,7 @@ def role_raise_decorator(func): raise Exception(format_trackback_info(limit=None)) except Exception as e: if self.latest_observed_msg: - logger.warning( + logger.exception( "There is a exception in role's execution, in order to resume, " "we delete the newest role communication message in the role's memory." ) diff --git a/metagpt/utils/file.py b/metagpt/utils/file.py index f62b44eb8..a8ed482d9 100644 --- a/metagpt/utils/file.py +++ b/metagpt/utils/file.py @@ -9,6 +9,7 @@ from pathlib import Path import aiofiles +from fsspec.implementations.memory import MemoryFileSystem as _MemoryFileSystem from metagpt.logs import logger from metagpt.utils.exceptions import handle_exception @@ -68,3 +69,10 @@ class File: content = b"".join(chunks) logger.debug(f"Successfully read file, the path of file: {file_path}") return content + + +class MemoryFileSystem(_MemoryFileSystem): + + @classmethod + def _strip_protocol(cls, path): + return super()._strip_protocol(str(path)) diff --git a/metagpt/utils/parse_html.py b/metagpt/utils/parse_html.py index 65aa3f236..3aac8ca6c 100644 --- a/metagpt/utils/parse_html.py +++ b/metagpt/utils/parse_html.py @@ -7,6 +7,8 @@ from urllib.parse import urljoin, urlparse from bs4 import BeautifulSoup from pydantic import BaseModel, PrivateAttr +import htmlmin + class WebPage(BaseModel): inner_text: str @@ -38,6 +40,22 @@ class WebPage(BaseModel): elif url.startswith(("http://", "https://")): yield urljoin(self.url, url) + def get_slim_soup(self, keep_links: bool = False): + soup = _get_soup(self.html) + keep_attrs = ["class"] + if keep_links: + keep_attrs.append("href") + + for i in soup.find_all(True): + for name in list(i.attrs): + if i[name] and name not in keep_attrs: + del i[name] + + for i in soup.find_all(["svg", "img", "video", "audio"]): + i.decompose() + + return soup + def get_html_content(page: str, base: str): soup = _get_soup(page) @@ -48,7 +66,12 @@ def get_html_content(page: str, base: str): def _get_soup(page: str): soup = BeautifulSoup(page, "html.parser") # https://stackoverflow.com/questions/1936466/how-to-scrape-only-visible-webpage-text-with-beautifulsoup - for s in soup(["style", "script", "[document]", "head", "title"]): + for s in soup(["style", "script", "[document]", "head", "title", "footer"]): s.extract() return soup + + +def simplify_html(html: str, url: str, keep_links: bool = False): + html = WebPage(inner_text="", html=html, url=url).get_slim_soup(keep_links).decode() + return htmlmin.minify(html, remove_comments=True, remove_empty_space=True) diff --git a/metagpt/utils/report.py b/metagpt/utils/report.py index a61c77381..2d72af111 100644 --- a/metagpt/utils/report.py +++ b/metagpt/utils/report.py @@ -39,6 +39,7 @@ class BlockType(str, Enum): GALLERY = "Gallery" NOTEBOOK = "Notebook" DOCS = "Docs" + THOUGHT = "Thought" END_MARKER_NAME = "end_marker" @@ -55,23 +56,23 @@ class ResourceReporter(BaseModel): callback_url: str = Field(METAGPT_REPORTER_DEFAULT_URL, description="The URL to which the report should be sent") _llm_task: Optional[asyncio.Task] = PrivateAttr(None) - def report(self, value: Any, name: str): + def report(self, value: Any, name: str, extra: Optional[dict] = None): """Synchronously report resource observation data. Args: value: The data to report. name: The type name of the data. """ - return self._report(value, name) + return self._report(value, name, extra) - async def async_report(self, value: Any, name: str): + async def async_report(self, value: Any, name: str, extra: Optional[dict] = None): """Asynchronously report resource observation data. Args: value: The data to report. name: The type name of the data. """ - return await self._async_report(value, name) + return await self._async_report(value, name, extra) @classmethod def set_report_fn(cls, fn: Callable): @@ -100,20 +101,20 @@ class ResourceReporter(BaseModel): """ cls._async_report = fn - def _report(self, value: Any, name: str): + def _report(self, value: Any, name: str, extra: Optional[dict] = None): if not self.callback_url: return - data = self._format_data(value, name) + data = self._format_data(value, name, extra) resp = requests.post(self.callback_url, json=data) resp.raise_for_status() return resp.text - async def _async_report(self, value: Any, name: str): + async def _async_report(self, value: Any, name: str, extra: Optional[dict] = None): if not self.callback_url: return - data = self._format_data(value, name) + data = self._format_data(value, name, extra) url = self.callback_url _result = urlparse(url) sessiion_kwargs = {} @@ -129,9 +130,16 @@ class ResourceReporter(BaseModel): resp.raise_for_status() return await resp.text() - def _format_data(self, value, name): + def _format_data(self, value, name, extra): data = self.model_dump(mode="json", exclude=("callback_url", "llm_stream")) - data["value"] = str(value) if isinstance(value, Path) else value + if isinstance(value, BaseModel): + value = value.model_dump(mode="json") + elif isinstance(value, Path): + value = str(value) + + if name == "path": + value = os.path.abspath(value) + data["value"] = value data["name"] = name role = CURRENT_ROLE.get(None) if role: @@ -139,6 +147,8 @@ class ResourceReporter(BaseModel): else: role_name = os.environ.get("METAGPT_ROLE") data["role"] = role_name + if extra: + data["extra"] = extra return data def __enter__(self): @@ -252,6 +262,16 @@ class TaskReporter(ObjectReporter): block: Literal[BlockType.TASK] = BlockType.TASK +class ThoughtReporter(ObjectReporter): + """Reporter for object resources to Task Block.""" + + block: Literal[BlockType.THOUGHT] = BlockType.THOUGHT + + async def __aenter__(self): + await self.async_report({}) + return await super().__aenter__() + + class FileReporter(ResourceReporter): """File resource callback for reporting complete file paths. @@ -259,13 +279,23 @@ class FileReporter(ResourceReporter): if the file can be partially output for display first, use streaming callback. """ - def report(self, value: Union[Path, dict, Any], name: Literal["path", "meta", "content"] = "path"): + def report( + self, + value: Union[Path, dict, Any], + name: Literal["path", "meta", "content"] = "path", + extra: Optional[dict] = None, + ): """Report file resource synchronously.""" - return super().report(value, name) + return super().report(value, name, extra) - async def async_report(self, value: Path, name: Literal["path", "meta", "content"] = "path"): + async def async_report( + self, + value: Union[Path, dict, Any], + name: Literal["path", "meta", "content"] = "path", + extra: Optional[dict] = None, + ): """Report file resource asynchronously.""" - return await super().async_report(value, name) + return await super().async_report(value, name, extra) class NotebookReporter(FileReporter): diff --git a/requirements.txt b/requirements.txt index b40c69c9f..83a904156 100644 --- a/requirements.txt +++ b/requirements.txt @@ -71,4 +71,6 @@ dashscope==1.14.1 rank-bm25==0.2.2 # for tool recommendation gymnasium==0.29.1 pylint~=3.0.3 -pygithub~=2.3 \ No newline at end of file +pygithub~=2.3 +htmlmin +fsspec