diff --git a/examples/di/crawl_webpage.py b/examples/di/crawl_webpage.py
index 92e3c32b0..c4e1b6599 100644
--- a/examples/di/crawl_webpage.py
+++ b/examples/di/crawl_webpage.py
@@ -6,9 +6,7 @@
 """
 
 from metagpt.roles.di.data_interpreter import DataInterpreter
-
-__import__("metagpt.tools.libs.browser", fromlist=["Browser"])  # To skip pre-commit check
-
+from metagpt.tools.libs.web_scraping import view_page_element_to_scrape
 
 PAPER_LIST_REQ = """"
 Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
@@ -34,7 +32,7 @@ NEWS_36KR_REQ = """从36kr创投平台https://pitchhub.36kr.com/financing-flash
 
 
 async def main():
-    di = DataInterpreter(tools=["Browser"])
+    di = DataInterpreter(tools=[view_page_element_to_scrape.__name__])
 
     await di.run(ECOMMERCE_REQ)
 
diff --git a/metagpt/rag/engines/simple.py b/metagpt/rag/engines/simple.py
index c237dcf69..8a9ccaffd 100644
--- a/metagpt/rag/engines/simple.py
+++ b/metagpt/rag/engines/simple.py
@@ -4,6 +4,7 @@ import json
 import os
 from typing import Any, Optional, Union
 
+import fsspec
 from llama_index.core import SimpleDirectoryReader
 from llama_index.core.callbacks.base import CallbackManager
 from llama_index.core.embeddings import BaseEmbedding
@@ -83,6 +84,7 @@ class SimpleEngine(RetrieverQueryEngine):
         llm: LLM = None,
         retriever_configs: list[BaseRetrieverConfig] = None,
         ranker_configs: list[BaseRankerConfig] = None,
+        fs: Optional[fsspec.AbstractFileSystem] = None,
     ) -> "SimpleEngine":
         """From docs.
 
@@ -96,11 +98,12 @@ class SimpleEngine(RetrieverQueryEngine):
             llm: Must supported by llama index. Default OpenAI.
             retriever_configs: Configuration for retrievers. If more than one config, will use SimpleHybridRetriever.
             ranker_configs: Configuration for rankers.
+            fs: File system to use.
         """
         if not input_dir and not input_files:
             raise ValueError("Must provide either `input_dir` or `input_files`.")
 
-        documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files).load_data()
+        documents = SimpleDirectoryReader(input_dir=input_dir, input_files=input_files, fs=fs).load_data()
         cls._fix_document_metadata(documents)
 
         transformations = transformations or cls._default_transformations()
diff --git a/metagpt/tools/libs/browser.py b/metagpt/tools/libs/browser.py
index df9d43135..9d24d4baf 100644
--- a/metagpt/tools/libs/browser.py
+++ b/metagpt/tools/libs/browser.py
@@ -28,7 +28,7 @@ from metagpt.utils.report import BrowserReporter
 
 
 @register_tool(
-    tags=["web", "browse", "scrape"],
+    tags=["web", "browse"],
     include_functions=[
         "click",
         "close_tab",
@@ -197,3 +197,10 @@ class Browser:
     async def view(self):
         observation = parse_accessibility_tree(self.accessibility_tree)
         return f"Current _Browser Viewer\n URL: {self.page.url}\nOBSERVATION:\n{observation[0]}\n"
+
+    async def __aenter__(self):
+        await self.start()
+        return self
+
+    async def __aexit__(self, *args, **kwargs):
+        await self.stop()
diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py
index bc34b1306..489c3a472 100644
--- a/metagpt/tools/libs/web_scraping.py
+++ b/metagpt/tools/libs/web_scraping.py
@@ -1,20 +1,50 @@
+import contextlib
+from uuid import uuid4
+
+from metagpt.tools.libs.browser import Browser
 from metagpt.tools.tool_registry import register_tool
-from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper
+from metagpt.utils.file import MemoryFileSystem
+from metagpt.utils.parse_html import simplify_html
 
 
-@register_tool(tags=["web scraping", "web"])
-async def scrape_web_playwright(url):
-    """
-    Asynchronously Scrape and save the HTML structure and inner text content of a web page using Playwright.
+@register_tool(tags=["web scraping"])
+async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None:
+    """view the HTML content of current page to understand the structure. When executed, the content will be printed out
 
     Args:
-        url (str): The main URL to fetch inner text from.
-
-    Returns:
-        dict: The inner text content and html structure of the web page, keys are 'inner_text', 'html'.
+        url (str): The URL of the web page to scrape.
+        requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements.
+        keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required
     """
-    # Create a PlaywrightWrapper instance for the Chromium browser
-    web = await PlaywrightWrapper().run(url)
+    async with Browser() as browser:
+        await browser.goto(url)
+        page = browser.page
+        html = await page.content()
+        html = simplify_html(html, url=page.url, keep_links=keep_links)
+    mem_fs = MemoryFileSystem()
+    filename = f"{uuid4().hex}.html"
+    with mem_fs.open(filename, "w") as f:
+        f.write(html)
 
-    # Return the inner text content of the web page
-    return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}
+    # Since RAG is an optional optimization, if it fails, the simplified HTML can be used as a fallback.
+    with contextlib.suppress(Exception):
+        from metagpt.rag.engines import SimpleEngine  # avoid circular import
+
+        # TODO make `from_docs` asynchronous
+        engine = SimpleEngine.from_docs(input_files=[filename], fs=mem_fs)
+        nodes = await engine.aretrieve(requirement)
+        html = "\n".join(i.text for i in nodes)
+
+    mem_fs.rm_file(filename)
+    print(html)
+
+
+# async def get_elements_outerhtml(self, element_ids: list[int]):
+#     """Inspect the outer HTML of the elements in Current Browser Viewer.
+#     """
+#     page = self.page
+#     data = []
+#     for element_id in element_ids:
+#         html = await get_element_outer_html(page, get_backend_node_id(element_id, self.accessibility_tree))
+#         data.append(html)
+#     return "\n".join(f"[{element_id}]. {html}" for element_id, html in zip(element_ids, data))