From 27e8fdf32065f44f98f72ae2718a7923be387bc7 Mon Sep 17 00:00:00 2001 From: lidanyang Date: Fri, 12 Jul 2024 18:40:43 +0800 Subject: [PATCH] recover --- metagpt/utils/parse_html.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metagpt/utils/parse_html.py b/metagpt/utils/parse_html.py index 031393501..985e54d96 100644 --- a/metagpt/utils/parse_html.py +++ b/metagpt/utils/parse_html.py @@ -43,11 +43,11 @@ class WebPage(BaseModel): soup = _get_soup(self.html) keep_attrs = ["class", "id"] if keep_links: - keep_attrs.extend(["href", "title"]) + keep_attrs.append("href") for i in soup.find_all(True): for name in list(i.attrs): - if i[name] and name not in keep_attrs and not name.startswith("data-"): + if i[name] and name not in keep_attrs: del i[name] for i in soup.find_all(["svg", "img", "video", "audio"]):