mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-08 15:05:17 +02:00
Merge branch 'add-web-scraping-test' into 'code_intepreter'
Add web scraping test See merge request agents/data_agents_opt!60
This commit is contained in:
commit
1fb77121f4
5 changed files with 51 additions and 3 deletions
22
examples/crawl_webpage.py
Normal file
22
examples/crawl_webpage.py
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
# -*- encoding: utf-8 -*-
|
||||
"""
|
||||
@Date : 2024/01/24 15:11:27
|
||||
@Author : orange-crow
|
||||
@File : crawl_webpage.py
|
||||
"""
|
||||
|
||||
from metagpt.roles.code_interpreter import CodeInterpreter
|
||||
|
||||
|
||||
async def main():
|
||||
prompt = """Get data from `paperlist` table in https://papercopilot.com/statistics/iclr-statistics/iclr-2024-statistics/,
|
||||
and save it to a csv file. paper title must include `multiagent` or `large language model`. *notice: print key data*"""
|
||||
ci = CodeInterpreter(goal=prompt, use_tools=True)
|
||||
|
||||
await ci.run(prompt)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
asyncio.run(main())
|
||||
|
|
@ -123,7 +123,10 @@ class ExecutePyCode(ExecuteCode, Action):
|
|||
return parsed_output
|
||||
|
||||
for i, output in enumerate(outputs):
|
||||
if output["output_type"] == "stream":
|
||||
if output["output_type"] == "stream" and not any(
|
||||
tag in output["text"]
|
||||
for tag in ["| INFO | metagpt", "| ERROR | metagpt", "| WARNING | metagpt"]
|
||||
):
|
||||
parsed_output += output["text"]
|
||||
elif output["output_type"] == "display_data":
|
||||
if "image/png" in output["data"]:
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ class CodeInterpreter(Role):
|
|||
if ReviewConst.CHANGE_WORD[0] in review:
|
||||
counter = 0 # redo the task again with help of human suggestions
|
||||
|
||||
return code["code"] if code["language"] != "markdown" else "", result, success
|
||||
return code["code"] if code.get("language") != "markdown" else "", result, success
|
||||
|
||||
async def _write_code(self):
|
||||
todo = WriteCodeByGenerate() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
|
||||
|
|
|
|||
|
|
@ -19,4 +19,4 @@ async def scrape_web_playwright(url, *urls):
|
|||
web = await PlaywrightWrapper("chromium").run(url, *urls)
|
||||
|
||||
# Return the inner text content of the web page
|
||||
return {"inner_text": web.inner_text, "html": web.html}
|
||||
return {"inner_text": web.inner_text.strip(), "html": web.html.strip()}
|
||||
|
|
|
|||
23
tests/metagpt/tools/libs/test_web_scraping.py
Normal file
23
tests/metagpt/tools/libs/test_web_scraping.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import pytest
|
||||
|
||||
from metagpt.tools.libs.web_scraping import scrape_web_playwright
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_web_playwright():
|
||||
test_url = "https://www.deepwisdom.ai"
|
||||
|
||||
result = await scrape_web_playwright(test_url)
|
||||
|
||||
# Assert that the result is a dictionary
|
||||
assert isinstance(result, dict)
|
||||
|
||||
# Assert that the result contains 'inner_text' and 'html' keys
|
||||
assert "inner_text" in result
|
||||
assert "html" in result
|
||||
|
||||
# Assert startswith and endswith
|
||||
assert not result["inner_text"].startswith(" ")
|
||||
assert not result["inner_text"].endswith(" ")
|
||||
assert not result["html"].startswith(" ")
|
||||
assert not result["html"].endswith(" ")
|
||||
Loading…
Add table
Add a link
Reference in a new issue