diff --git a/metagpt/config.py b/metagpt/config.py index faeffd777..d47d5325b 100644 --- a/metagpt/config.py +++ b/metagpt/config.py @@ -76,10 +76,10 @@ class Config(metaclass=Singleton): logger.warning("LONG_TERM_MEMORY is True") self.max_budget = self._get("MAX_BUDGET", 10.0) self.total_cost = 0.0 - self.puppeteer_config = self._get("PUPPETEER_CONFIG", "") - self.mmdc = self._get("MMDC", "mmdc") - self.update_costs = self._get("UPDATE_COSTS", True) - self.calc_usage = self._get("CALC_USAGE", True) + + self.puppeteer_config = self._get("PUPPETEER_CONFIG","") + self.mmdc = self._get("MMDC","mmdc") + self.calc_usage = self._get("CALC_USAGE",True) self.model_for_researcher_summary = self._get("MODEL_FOR_RESEARCHER_SUMMARY") self.model_for_researcher_report = self._get("MODEL_FOR_RESEARCHER_REPORT") diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 86b63770c..551810546 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -23,27 +23,6 @@ from metagpt.utils.token_counter import ( get_max_completion_tokens, ) -<<<<<<< main -def retry(max_retries): - def decorator(f): - @wraps(f) - async def wrapper(*args, **kwargs): - for i in range(max_retries): - try: - return await f(*args, **kwargs) - except Exception: - if i == max_retries - 1: - raise - await asyncio.sleep(2 ** i) - return wrapper - return decorator - -class RateLimiter: - """Rate limiter class, each call goes through wait_if_needed, sleep if rate limiting is required""" - def __init__(self, rpm): - self.last_call_time = 0 - self.interval = 1.1 * 60 / rpm # Here 1.1 is used because even if the calls are made strictly on time, they will still be QOS'd; consider switching to simple error retry later -======= class RateLimiter: """Rate control class, each call goes through wait_if_needed, sleep if rate control is needed""" @@ -53,7 +32,6 @@ class RateLimiter: # Here 1.1 is used because even if the calls are made strictly according to time, # they will still be QOS'd; consider switching to simple error retry later self.interval = 1.1 * 60 / rpm ->>>>>>> main self.rpm = rpm def split_batches(self, batch): diff --git a/metagpt/tools/web_browser_engine_playwright.py b/metagpt/tools/web_browser_engine_playwright.py index 94539e9a3..030e7701b 100644 --- a/metagpt/tools/web_browser_engine_playwright.py +++ b/metagpt/tools/web_browser_engine_playwright.py @@ -2,12 +2,15 @@ from __future__ import annotations import asyncio -from pathlib import Path import sys +from pathlib import Path from typing import Literal + from playwright.async_api import async_playwright + from metagpt.config import CONFIG from metagpt.logs import logger +from metagpt.utils.parse_html import WebPage class PlaywrightWrapper: @@ -16,7 +19,7 @@ class PlaywrightWrapper: To use this module, you should have the `playwright` Python package installed and ensure that the required browsers are also installed. You can install playwright by running the command `pip install metagpt[playwright]` and download the necessary browser binaries by running the - command `playwright install` for the first time." + command `playwright install` for the first time. """ def __init__( @@ -40,27 +43,30 @@ class PlaywrightWrapper: self._context_kwargs = context_kwargs self._has_run_precheck = False - async def run(self, url: str, *urls: str) -> str | list[str]: + async def run(self, url: str, *urls: str) -> WebPage | list[WebPage]: async with async_playwright() as ap: browser_type = getattr(ap, self.browser_type) await self._run_precheck(browser_type) browser = await browser_type.launch(**self.launch_kwargs) - - async def _scrape(url): - context = await browser.new_context(**self._context_kwargs) - page = await context.new_page() - async with page: - try: - await page.goto(url) - await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") - content = await page.content() - return content - except Exception as e: - return f"Fail to load page content for {e}" + _scrape = self._scrape if urls: - return await asyncio.gather(_scrape(url), *(_scrape(i) for i in urls)) - return await _scrape(url) + return await asyncio.gather(_scrape(browser, url), *(_scrape(browser, i) for i in urls)) + return await _scrape(browser, url) + + async def _scrape(self, browser, url): + context = await browser.new_context(**self._context_kwargs) + page = await context.new_page() + async with page: + try: + await page.goto(url) + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + html = await page.content() + inner_text = await page.evaluate("() => document.body.innerText") + except Exception as e: + inner_text = f"Fail to load page content for {e}" + html = "" + return WebPage(inner_text=inner_text, html=html, url=url) async def _run_precheck(self, browser_type): if self._has_run_precheck: @@ -72,6 +78,10 @@ class PlaywrightWrapper: if CONFIG.global_proxy: kwargs["env"] = {"ALL_PROXY": CONFIG.global_proxy} await _install_browsers(self.browser_type, **kwargs) + + if self._has_run_precheck: + return + if not executable_path.exists(): parts = executable_path.parts available_paths = list(Path(*parts[:-3]).glob(f"{self.browser_type}-*")) @@ -85,25 +95,37 @@ class PlaywrightWrapper: self._has_run_precheck = True +def _get_install_lock(): + global _install_lock + if _install_lock is None: + _install_lock = asyncio.Lock() + return _install_lock + + async def _install_browsers(*browsers, **kwargs) -> None: - process = await asyncio.create_subprocess_exec( - sys.executable, - "-m", - "playwright", - "install", - *browsers, - "--with-deps", - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - **kwargs, - ) + async with _get_install_lock(): + browsers = [i for i in browsers if i not in _install_cache] + if not browsers: + return + process = await asyncio.create_subprocess_exec( + sys.executable, + "-m", + "playwright", + "install", + *browsers, + # "--with-deps", + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + **kwargs, + ) - await asyncio.gather(_log_stream(process.stdout, logger.info), _log_stream(process.stderr, logger.warning)) + await asyncio.gather(_log_stream(process.stdout, logger.info), _log_stream(process.stderr, logger.warning)) - if await process.wait() == 0: - logger.info(f"Install browser for playwright successfully.") - else: - logger.warning(f"Fail to install browser for playwright.") + if await process.wait() == 0: + logger.info("Install browser for playwright successfully.") + else: + logger.warning("Fail to install browser for playwright.") + _install_cache.update(browsers) async def _log_stream(sr, log_func): @@ -114,8 +136,14 @@ async def _log_stream(sr, log_func): log_func(f"[playwright install browser]: {line.decode().strip()}") +_install_lock: asyncio.Lock = None +_install_cache = set() + + if __name__ == "__main__": - for i in ("chromium", "firefox", "webkit"): - text = asyncio.run(PlaywrightWrapper(i).run("https://httpbin.org/ip")) - print(text) - print(i) \ No newline at end of file + import fire + + async def main(url: str, *urls: str, browser_type: str = "chromium", **kwargs): + return await PlaywrightWrapper(browser_type, **kwargs).run(url, *urls) + + fire.Fire(main)