diff --git a/metagpt/tools/libs/web_scraping.py b/metagpt/tools/libs/web_scraping.py
index 489c3a472..9e7a8041c 100644
--- a/metagpt/tools/libs/web_scraping.py
+++ b/metagpt/tools/libs/web_scraping.py
@@ -8,13 +8,15 @@ from metagpt.utils.parse_html import simplify_html
 
 
 @register_tool(tags=["web scraping"])
-async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> None:
-    """view the HTML content of current page to understand the structure. When executed, the content will be printed out
+async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bool = False) -> str:
+    """view the HTML content of current page to understand the structure.
 
     Args:
         url (str): The URL of the web page to scrape.
         requirement (str): Providing a clear and detailed requirement helps in focusing the inspection on the desired elements.
         keep_links (bool): Whether to keep the hyperlinks in the HTML content. Set to True if links are required
+    Returns:
+        str: The HTML content of the page.
     """
     async with Browser() as browser:
         await browser.goto(url)
@@ -36,7 +38,7 @@ async def view_page_element_to_scrape(url: str, requirement: str, keep_links: bo
         html = "\n".join(i.text for i in nodes)
 
     mem_fs.rm_file(filename)
-    print(html)
+    return html
 
 
 # async def get_elements_outerhtml(self, element_ids: list[int]):
diff --git a/metagpt/utils/parse_html.py b/metagpt/utils/parse_html.py
index 1ed3a620c..031393501 100644
--- a/metagpt/utils/parse_html.py
+++ b/metagpt/utils/parse_html.py
@@ -41,13 +41,13 @@ class WebPage(BaseModel):
 
     def get_slim_soup(self, keep_links: bool = False):
         soup = _get_soup(self.html)
-        keep_attrs = ["class"]
+        keep_attrs = ["class", "id"]
         if keep_links:
-            keep_attrs.append("href")
+            keep_attrs.extend(["href", "title"])
 
         for i in soup.find_all(True):
             for name in list(i.attrs):
-                if i[name] and name not in keep_attrs:
+                if i[name] and name not in keep_attrs and not name.startswith("data-"):
                     del i[name]
 
         for i in soup.find_all(["svg", "img", "video", "audio"]):