+添加运营小姐姐,拉你入群
-如果群已满,请添加负责人微信,会邀请进群
-
-
\ No newline at end of file
+
diff --git a/docs/README_JA.md b/docs/README_JA.md
index a5e5f6552..57f6487a7 100644
--- a/docs/README_JA.md
+++ b/docs/README_JA.md
@@ -75,25 +75,25 @@ ### Docker によるインストール
```bash
# ステップ 1: metagpt 公式イメージをダウンロードし、config.yaml を準備する
-docker pull metagpt/metagpt:v0.3
+docker pull metagpt/metagpt:v0.3.1
mkdir -p /opt/metagpt/{config,workspace}
-docker run --rm metagpt/metagpt:v0.3 cat /app/metagpt/config/config.yaml > /opt/metagpt/config/config.yaml
-vim /opt/metagpt/config/config.yaml # 設定を変更する
+docker run --rm metagpt/metagpt:v0.3.1 cat /app/metagpt/config/config.yaml > /opt/metagpt/config/key.yaml
+vim /opt/metagpt/config/key.yaml # 設定を変更する
# ステップ 2: コンテナで metagpt デモを実行する
docker run --rm \
--privileged \
- -v /opt/metagpt/config:/app/metagpt/config \
+ -v /opt/metagpt/config/key.yaml:/app/metagpt/config/key.yaml \
-v /opt/metagpt/workspace:/app/metagpt/workspace \
- metagpt/metagpt:v0.3 \
+ metagpt/metagpt:v0.3.1 \
python startup.py "Write a cli snake game"
# コンテナを起動し、その中でコマンドを実行することもできます
docker run --name metagpt -d \
--privileged \
- -v /opt/metagpt/config:/app/metagpt/config \
+ -v /opt/metagpt/config/key.yaml:/app/metagpt/config/key.yaml \
-v /opt/metagpt/workspace:/app/metagpt/workspace \
- metagpt/metagpt:v0.3
+ metagpt/metagpt:v0.3.1
docker exec -it metagpt /bin/bash
$ python startup.py "Write a cli snake game"
@@ -111,7 +111,7 @@ ### 自分でイメージをビルドする
```bash
# また、自分で metagpt イメージを構築することもできます。
git clone https://github.com/geekan/MetaGPT.git
-cd MetaGPT && docker build -t metagpt:v0.3 .
+cd MetaGPT && docker build -t metagpt:custom .
```
## 設定
@@ -142,37 +142,36 @@ ### プラットフォームまたはツールの設定
要件を述べるときに、どのプラットフォームまたはツールを使用するかを指定できます。
```shell
-python startup.py "Write a cli snake game based on pygame"
+python startup.py "pygame をベースとした cli ヘビゲームを書く"
```
-
### 使用方法
```
-NAME
- startup.py - We are a software startup comprised of AI. By investing in us, you are empowering a future filled with limitless possibilities.
+会社名
+ startup.py - 私たちは AI で構成されたソフトウェア・スタートアップです。私たちに投資することは、無限の可能性に満ちた未来に力を与えることです。
-SYNOPSIS
+シノプシス
startup.py IDEA .*)(```.*?)',
+ r'(.*?```python.*?\s+)?(?P.*)',
+ ):
+ match = re.search(pattern, text, re.DOTALL)
+ if not match:
+ continue
+ code = match.group("code")
+ if not code:
+ continue
+ with contextlib.suppress(Exception):
+ ast.parse(code)
+ return code
+ raise ValueError("Invalid python code")
@classmethod
def parse_data(cls, data):
@@ -231,7 +249,8 @@ def print_members(module, indent=0):
elif inspect.ismethod(obj):
print(f'{prefix}Method: {name}')
+
def parse_recipient(text):
- pattern = "## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
+ pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
recipient = re.search(pattern, text)
return recipient.group(1) if recipient else ""
diff --git a/metagpt/utils/parse_html.py b/metagpt/utils/parse_html.py
new file mode 100644
index 000000000..62de26541
--- /dev/null
+++ b/metagpt/utils/parse_html.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+from __future__ import annotations
+
+from typing import Generator, Optional
+from urllib.parse import urljoin, urlparse
+
+from bs4 import BeautifulSoup
+from pydantic import BaseModel
+
+
+class WebPage(BaseModel):
+ inner_text: str
+ html: str
+ url: str
+
+ class Config:
+ underscore_attrs_are_private = True
+
+ _soup : Optional[BeautifulSoup] = None
+ _title: Optional[str] = None
+
+ @property
+ def soup(self) -> BeautifulSoup:
+ if self._soup is None:
+ self._soup = BeautifulSoup(self.html, "html.parser")
+ return self._soup
+
+ @property
+ def title(self):
+ if self._title is None:
+ title_tag = self.soup.find("title")
+ self._title = title_tag.text.strip() if title_tag is not None else ""
+ return self._title
+
+ def get_links(self) -> Generator[str, None, None]:
+ for i in self.soup.find_all("a", href=True):
+ url = i["href"]
+ result = urlparse(url)
+ if not result.scheme and result.path:
+ yield urljoin(self.url, url)
+ elif url.startswith(("http://", "https://")):
+ yield urljoin(self.url, url)
+
+
+def get_html_content(page: str, base: str):
+ soup = _get_soup(page)
+
+ return soup.get_text(strip=True)
+
+
+def _get_soup(page: str):
+ soup = BeautifulSoup(page, "html.parser")
+ # https://stackoverflow.com/questions/1936466/how-to-scrape-only-visible-webpage-text-with-beautifulsoup
+ for s in soup(["style", "script", "[document]", "head", "title"]):
+ s.extract()
+
+ return soup
diff --git a/metagpt/utils/pycst.py b/metagpt/utils/pycst.py
new file mode 100644
index 000000000..afd85a547
--- /dev/null
+++ b/metagpt/utils/pycst.py
@@ -0,0 +1,166 @@
+from __future__ import annotations
+
+from typing import Union
+
+import libcst as cst
+from libcst._nodes.module import Module
+
+DocstringNode = Union[cst.Module, cst.ClassDef, cst.FunctionDef]
+
+
+def get_docstring_statement(body: DocstringNode) -> cst.SimpleStatementLine:
+ """Extracts the docstring from the body of a node.
+
+ Args:
+ body: The body of a node.
+
+ Returns:
+ The docstring statement if it exists, None otherwise.
+ """
+ if isinstance(body, cst.Module):
+ body = body.body
+ else:
+ body = body.body.body
+
+ if not body:
+ return
+
+ statement = body[0]
+ if not isinstance(statement, cst.SimpleStatementLine):
+ return
+
+ expr = statement
+ while isinstance(expr, (cst.BaseSuite, cst.SimpleStatementLine)):
+ if len(expr.body) == 0:
+ return None
+ expr = expr.body[0]
+
+ if not isinstance(expr, cst.Expr):
+ return None
+
+ val = expr.value
+ if not isinstance(val, (cst.SimpleString, cst.ConcatenatedString)):
+ return None
+
+ evaluated_value = val.evaluated_value
+ if isinstance(evaluated_value, bytes):
+ return None
+
+ return statement
+
+
+class DocstringCollector(cst.CSTVisitor):
+ """A visitor class for collecting docstrings from a CST.
+
+ Attributes:
+ stack: A list to keep track of the current path in the CST.
+ docstrings: A dictionary mapping paths in the CST to their corresponding docstrings.
+ """
+ def __init__(self):
+ self.stack: list[str] = []
+ self.docstrings: dict[tuple[str, ...], cst.SimpleStatementLine] = {}
+
+ def visit_Module(self, node: cst.Module) -> bool | None:
+ self.stack.append("")
+
+ def leave_Module(self, node: cst.Module) -> None:
+ return self._leave(node)
+
+ def visit_ClassDef(self, node: cst.ClassDef) -> bool | None:
+ self.stack.append(node.name.value)
+
+ def leave_ClassDef(self, node: cst.ClassDef) -> None:
+ return self._leave(node)
+
+ def visit_FunctionDef(self, node: cst.FunctionDef) -> bool | None:
+ self.stack.append(node.name.value)
+
+ def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
+ return self._leave(node)
+
+ def _leave(self, node: DocstringNode) -> None:
+ key = tuple(self.stack)
+ self.stack.pop()
+ if hasattr(node, "decorators") and any(i.decorator.value == "overload" for i in node.decorators):
+ return
+
+ statement = get_docstring_statement(node)
+ if statement:
+ self.docstrings[key] = statement
+
+
+class DocstringTransformer(cst.CSTTransformer):
+ """A transformer class for replacing docstrings in a CST.
+
+ Attributes:
+ stack: A list to keep track of the current path in the CST.
+ docstrings: A dictionary mapping paths in the CST to their corresponding docstrings.
+ """
+ def __init__(
+ self,
+ docstrings: dict[tuple[str, ...], cst.SimpleStatementLine],
+ ):
+ self.stack: list[str] = []
+ self.docstrings = docstrings
+
+ def visit_Module(self, node: cst.Module) -> bool | None:
+ self.stack.append("")
+
+ def leave_Module(self, original_node: Module, updated_node: Module) -> Module:
+ return self._leave(original_node, updated_node)
+
+ def visit_ClassDef(self, node: cst.ClassDef) -> bool | None:
+ self.stack.append(node.name.value)
+
+ def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.CSTNode:
+ return self._leave(original_node, updated_node)
+
+ def visit_FunctionDef(self, node: cst.FunctionDef) -> bool | None:
+ self.stack.append(node.name.value)
+
+ def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.CSTNode:
+ return self._leave(original_node, updated_node)
+
+ def _leave(self, original_node: DocstringNode, updated_node: DocstringNode) -> DocstringNode:
+ key = tuple(self.stack)
+ self.stack.pop()
+
+ if hasattr(updated_node, "decorators") and any((i.decorator.value == "overload") for i in updated_node.decorators):
+ return updated_node
+
+ statement = self.docstrings.get(key)
+ if not statement:
+ return updated_node
+
+ original_statement = get_docstring_statement(original_node)
+
+ if isinstance(updated_node, cst.Module):
+ body = updated_node.body
+ if original_statement:
+ return updated_node.with_changes(body=(statement, *body[1:]))
+ else:
+ updated_node = updated_node.with_changes(body=(statement, cst.EmptyLine(), *body))
+ return updated_node
+
+ body = updated_node.body.body[1:] if original_statement else updated_node.body.body
+ return updated_node.with_changes(body=updated_node.body.with_changes(body=(statement, *body)))
+
+
+def merge_docstring(code: str, documented_code: str) -> str:
+ """Merges the docstrings from the documented code into the original code.
+
+ Args:
+ code: The original code.
+ documented_code: The documented code.
+
+ Returns:
+ The original code with the docstrings from the documented code.
+ """
+ code_tree = cst.parse_module(code)
+ documented_code_tree = cst.parse_module(documented_code)
+
+ visitor = DocstringCollector()
+ documented_code_tree.visit(visitor)
+ transformer = DocstringTransformer(visitor.docstrings)
+ modified_tree = code_tree.visit(transformer)
+ return modified_tree.code
diff --git a/metagpt/utils/text.py b/metagpt/utils/text.py
new file mode 100644
index 000000000..be3c52edd
--- /dev/null
+++ b/metagpt/utils/text.py
@@ -0,0 +1,124 @@
+from typing import Generator, Sequence
+
+from metagpt.utils.token_counter import TOKEN_MAX, count_string_tokens
+
+
+def reduce_message_length(msgs: Generator[str, None, None], model_name: str, system_text: str, reserved: int = 0,) -> str:
+ """Reduce the length of concatenated message segments to fit within the maximum token size.
+
+ Args:
+ msgs: A generator of strings representing progressively shorter valid prompts.
+ model_name: The name of the encoding to use. (e.g., "gpt-3.5-turbo")
+ system_text: The system prompts.
+ reserved: The number of reserved tokens.
+
+ Returns:
+ The concatenated message segments reduced to fit within the maximum token size.
+
+ Raises:
+ RuntimeError: If it fails to reduce the concatenated message length.
+ """
+ max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
+ for msg in msgs:
+ if count_string_tokens(msg, model_name) < max_token:
+ return msg
+
+ raise RuntimeError("fail to reduce message length")
+
+
+def generate_prompt_chunk(
+ text: str,
+ prompt_template: str,
+ model_name: str,
+ system_text: str,
+ reserved: int = 0,
+) -> Generator[str, None, None]:
+ """Split the text into chunks of a maximum token size.
+
+ Args:
+ text: The text to split.
+ prompt_template: The template for the prompt, containing a single `{}` placeholder. For example, "### Reference\n{}".
+ model_name: The name of the encoding to use. (e.g., "gpt-3.5-turbo")
+ system_text: The system prompts.
+ reserved: The number of reserved tokens.
+
+ Yields:
+ The chunk of text.
+ """
+ paragraphs = text.splitlines(keepends=True)
+ current_token = 0
+ current_lines = []
+
+ reserved = reserved + count_string_tokens(prompt_template+system_text, model_name)
+ # 100 is a magic number to ensure the maximum context length is not exceeded
+ max_token = TOKEN_MAX.get(model_name, 2048) - reserved - 100
+
+ while paragraphs:
+ paragraph = paragraphs.pop(0)
+ token = count_string_tokens(paragraph, model_name)
+ if current_token + token <= max_token:
+ current_lines.append(paragraph)
+ current_token += token
+ elif token > max_token:
+ paragraphs = split_paragraph(paragraph) + paragraphs
+ continue
+ else:
+ yield prompt_template.format("".join(current_lines))
+ current_lines = [paragraph]
+ current_token = token
+
+ if current_lines:
+ yield prompt_template.format("".join(current_lines))
+
+
+def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str]:
+ """Split a paragraph into multiple parts.
+
+ Args:
+ paragraph: The paragraph to split.
+ sep: The separator character.
+ count: The number of parts to split the paragraph into.
+
+ Returns:
+ A list of split parts of the paragraph.
+ """
+ for i in sep:
+ sentences = list(_split_text_with_ends(paragraph, i))
+ if len(sentences) <= 1:
+ continue
+ ret = ["".join(j) for j in _split_by_count(sentences, count)]
+ return ret
+ return _split_by_count(paragraph, count)
+
+
+def decode_unicode_escape(text: str) -> str:
+ """Decode a text with unicode escape sequences.
+
+ Args:
+ text: The text to decode.
+
+ Returns:
+ The decoded text.
+ """
+ return text.encode("utf-8").decode("unicode_escape", "ignore")
+
+
+def _split_by_count(lst: Sequence , count: int):
+ avg = len(lst) // count
+ remainder = len(lst) % count
+ start = 0
+ for i in range(count):
+ end = start + avg + (1 if i < remainder else 0)
+ yield lst[start:end]
+ start = end
+
+
+def _split_text_with_ends(text: str, sep: str = "."):
+ parts = []
+ for i in text:
+ parts.append(i)
+ if i == sep:
+ yield "".join(parts)
+ parts = []
+ if parts:
+ yield "".join(parts)
diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py
index 99ae5e176..591bb60f0 100644
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@@ -25,6 +25,21 @@ TOKEN_COSTS = {
}
+TOKEN_MAX = {
+ "gpt-3.5-turbo": 4096,
+ "gpt-3.5-turbo-0301": 4096,
+ "gpt-3.5-turbo-0613": 4096,
+ "gpt-3.5-turbo-16k": 16384,
+ "gpt-3.5-turbo-16k-0613": 16384,
+ "gpt-4-0314": 8192,
+ "gpt-4": 8192,
+ "gpt-4-32k": 32768,
+ "gpt-4-32k-0314": 32768,
+ "gpt-4-0613": 8192,
+ "text-embedding-ada-002": 8192,
+}
+
+
def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
"""Return the number of tokens used by a list of messages."""
try:
@@ -39,7 +54,7 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
"gpt-4-32k-0314",
"gpt-4-0613",
"gpt-4-32k-0613",
- }:
+ }:
tokens_per_message = 3
tokens_per_name = 1
elif model == "gpt-3.5-turbo-0301":
@@ -79,3 +94,18 @@ def count_string_tokens(string: str, model_name: str) -> int:
"""
encoding = tiktoken.encoding_for_model(model_name)
return len(encoding.encode(string))
+
+
+def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> int:
+ """Calculate the maximum number of completion tokens for a given model and list of messages.
+
+ Args:
+ messages: A list of messages.
+ model: The model name.
+
+ Returns:
+ The maximum number of completion tokens.
+ """
+ if model not in TOKEN_MAX:
+ return default
+ return TOKEN_MAX[model] - count_message_tokens(messages)
diff --git a/requirements.txt b/requirements.txt
index 32a436962..452e2d092 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -35,3 +35,4 @@ tqdm==4.64.0
anthropic==0.3.6
typing-inspect==0.8.0
typing_extensions==4.5.0
+libcst==1.0.1
diff --git a/setup.py b/setup.py
index e65696901..2a8edaae7 100644
--- a/setup.py
+++ b/setup.py
@@ -44,7 +44,7 @@ setup(
install_requires=requirements,
extras_require={
"playwright": ["playwright>=1.26", "beautifulsoup4"],
- "selenium": ["selenium>4", "webdriver_manager<3.9", "beautifulsoup4"],
+ "selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"],
},
cmdclass={
"install_mermaid": InstallMermaidCLI,
diff --git a/tests/metagpt/actions/test_write_docstring.py b/tests/metagpt/actions/test_write_docstring.py
new file mode 100644
index 000000000..82d96e1a6
--- /dev/null
+++ b/tests/metagpt/actions/test_write_docstring.py
@@ -0,0 +1,32 @@
+import pytest
+
+from metagpt.actions.write_docstring import WriteDocstring
+
+code = '''
+def add_numbers(a: int, b: int):
+ return a + b
+
+
+class Person:
+ def __init__(self, name: str, age: int):
+ self.name = name
+ self.age = age
+
+ def greet(self):
+ return f"Hello, my name is {self.name} and I am {self.age} years old."
+'''
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+ ("style", "part"),
+ [
+ ("google", "Args:"),
+ ("numpy", "Parameters"),
+ ("sphinx", ":param name:"),
+ ],
+ ids=["google", "numpy", "sphinx"]
+)
+async def test_write_docstring(style: str, part: str):
+ ret = await WriteDocstring().run(code, style=style)
+ assert part in ret
diff --git a/tests/metagpt/roles/test_researcher.py b/tests/metagpt/roles/test_researcher.py
new file mode 100644
index 000000000..01b5dae3b
--- /dev/null
+++ b/tests/metagpt/roles/test_researcher.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from random import random
+from tempfile import TemporaryDirectory
+
+import pytest
+
+from metagpt.roles import researcher
+
+
+async def mock_llm_ask(self, prompt: str, system_msgs):
+ if "Please provide up to 2 necessary keywords" in prompt:
+ return '["dataiku", "datarobot"]'
+ elif "Provide up to 4 queries related to your research topic" in prompt:
+ return '["Dataiku machine learning platform", "DataRobot AI platform comparison", ' \
+ '"Dataiku vs DataRobot features", "Dataiku and DataRobot use cases"]'
+ elif "sort the remaining search results" in prompt:
+ return '[1,2]'
+ elif "Not relevant." in prompt:
+ return "Not relevant" if random() > 0.5 else prompt[-100:]
+ elif "provide a detailed research report" in prompt:
+ return f"# Research Report\n## Introduction\n{prompt}"
+ return ""
+
+
+@pytest.mark.asyncio
+async def test_researcher(mocker):
+ with TemporaryDirectory() as dirname:
+ topic = "dataiku vs. datarobot"
+ mocker.patch("metagpt.provider.base_gpt_api.BaseGPTAPI.aask", mock_llm_ask)
+ researcher.RESEARCH_PATH = Path(dirname)
+ await researcher.Researcher().run(topic)
+ assert (researcher.RESEARCH_PATH / f"{topic}.md").read_text().startswith("# Research Report")
diff --git a/tests/metagpt/tools/test_search_engine.py b/tests/metagpt/tools/test_search_engine.py
index 2418c7b26..a7fe063a6 100644
--- a/tests/metagpt/tools/test_search_engine.py
+++ b/tests/metagpt/tools/test_search_engine.py
@@ -5,24 +5,44 @@
@Author : alexanderwu
@File : test_search_engine.py
"""
+from __future__ import annotations
import pytest
from metagpt.logs import logger
+from metagpt.tools import SearchEngineType
from metagpt.tools.search_engine import SearchEngine
+class MockSearchEnine:
+ async def run(self, query: str, max_results: int = 8, as_string: bool = True) -> str | list[dict[str, str]]:
+ rets = [{"url": "https://metagpt.com/mock/{i}", "title": query, "snippet": query * i} for i in range(max_results)]
+ return "\n".join(rets) if as_string else rets
+
+
@pytest.mark.asyncio
-@pytest.mark.usefixtures("llm_api")
-async def test_search_engine(llm_api):
- search_engine = SearchEngine()
- poetries = [
- # ("北京美食", "北京"),
- ("屈臣氏", "屈臣氏")
- ]
- for i, j in poetries:
- rsp = await search_engine.run(i)
- # rsp = context.llm.ask_batch([prompt])
- logger.info(rsp)
- # assert any(j in k['body'] for k in rsp)
- assert len(rsp) > 0
+@pytest.mark.parametrize(
+ ("search_engine_typpe", "run_func", "max_results", "as_string"),
+ [
+ (SearchEngineType.SERPAPI_GOOGLE, None, 8, True),
+ (SearchEngineType.SERPAPI_GOOGLE, None, 4, False),
+ (SearchEngineType.DIRECT_GOOGLE, None, 8, True),
+ (SearchEngineType.DIRECT_GOOGLE, None, 6, False),
+ (SearchEngineType.SERPER_GOOGLE, None, 8, True),
+ (SearchEngineType.SERPER_GOOGLE, None, 6, False),
+ (SearchEngineType.DUCK_DUCK_GO, None, 8, True),
+ (SearchEngineType.DUCK_DUCK_GO, None, 6, False),
+ (SearchEngineType.CUSTOM_ENGINE, MockSearchEnine().run, 8, False),
+ (SearchEngineType.CUSTOM_ENGINE, MockSearchEnine().run, 6, False),
+
+ ],
+)
+async def test_search_engine(search_engine_typpe, run_func, max_results, as_string, ):
+ search_engine = SearchEngine(search_engine_typpe, run_func)
+ rsp = await search_engine.run("metagpt", max_results=max_results, as_string=as_string)
+ logger.info(rsp)
+ if as_string:
+ assert isinstance(rsp, str)
+ else:
+ assert isinstance(rsp, list)
+ assert len(rsp) == max_results
diff --git a/tests/metagpt/tools/test_web_browser_engine_playwright.py b/tests/metagpt/tools/test_web_browser_engine_playwright.py
index 908f92112..69e1339e7 100644
--- a/tests/metagpt/tools/test_web_browser_engine_playwright.py
+++ b/tests/metagpt/tools/test_web_browser_engine_playwright.py
@@ -1,4 +1,5 @@
import pytest
+
from metagpt.config import CONFIG
from metagpt.tools import web_browser_engine_playwright
@@ -20,6 +21,7 @@ async def test_scrape_web_page(browser_type, use_proxy, kwagrs, url, urls, proxy
CONFIG.global_proxy = proxy
browser = web_browser_engine_playwright.PlaywrightWrapper(browser_type, **kwagrs)
result = await browser.run(url)
+ result = result.inner_text
assert isinstance(result, str)
assert "Deepwisdom" in result
diff --git a/tests/metagpt/tools/test_web_browser_engine_selenium.py b/tests/metagpt/tools/test_web_browser_engine_selenium.py
index 5ea1e3083..ce322f7bd 100644
--- a/tests/metagpt/tools/test_web_browser_engine_selenium.py
+++ b/tests/metagpt/tools/test_web_browser_engine_selenium.py
@@ -1,4 +1,5 @@
import pytest
+
from metagpt.config import CONFIG
from metagpt.tools import web_browser_engine_selenium
@@ -20,6 +21,7 @@ async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd)
CONFIG.global_proxy = proxy
browser = web_browser_engine_selenium.SeleniumWrapper(browser_type)
result = await browser.run(url)
+ result = result.inner_text
assert isinstance(result, str)
assert "Deepwisdom" in result
@@ -27,7 +29,7 @@ async def test_scrape_web_page(browser_type, use_proxy, url, urls, proxy, capfd)
results = await browser.run(url, *urls)
assert isinstance(results, list)
assert len(results) == len(urls) + 1
- assert all(("Deepwisdom" in i) for i in results)
+ assert all(("Deepwisdom" in i.inner_text) for i in results)
if use_proxy:
assert "Proxy:" in capfd.readouterr().out
finally:
diff --git a/tests/metagpt/utils/test_output_parser.py b/tests/metagpt/utils/test_output_parser.py
index 155297860..c56cff6fa 100644
--- a/tests/metagpt/utils/test_output_parser.py
+++ b/tests/metagpt/utils/test_output_parser.py
@@ -19,7 +19,7 @@ def test_parse_blocks():
def test_parse_code():
- test_text = "```python\nprint('Hello, world!')\n```"
+ test_text = "```python\nprint('Hello, world!')```"
expected_result = "print('Hello, world!')"
assert OutputParser.parse_code(test_text, 'python') == expected_result
@@ -27,6 +27,22 @@ def test_parse_code():
OutputParser.parse_code(test_text, 'java')
+def test_parse_python_code():
+ expected_result = "print('Hello, world!')"
+ assert OutputParser.parse_python_code("```python\nprint('Hello, world!')```") == expected_result
+ assert OutputParser.parse_python_code("```python\nprint('Hello, world!')") == expected_result
+ assert OutputParser.parse_python_code("print('Hello, world!')") == expected_result
+ assert OutputParser.parse_python_code("print('Hello, world!')```") == expected_result
+ assert OutputParser.parse_python_code("print('Hello, world!')```") == expected_result
+ expected_result = "print('```Hello, world!```')"
+ assert OutputParser.parse_python_code("```python\nprint('```Hello, world!```')```") == expected_result
+ assert OutputParser.parse_python_code("The code is: ```python\nprint('```Hello, world!```')```") == expected_result
+ assert OutputParser.parse_python_code("xxx.\n```python\nprint('```Hello, world!```')```\nxxx") == expected_result
+
+ with pytest.raises(ValueError):
+ OutputParser.parse_python_code("xxx =")
+
+
def test_parse_str():
test_text = "name = 'Alice'"
expected_result = 'Alice'
diff --git a/tests/metagpt/utils/test_parse_html.py b/tests/metagpt/utils/test_parse_html.py
new file mode 100644
index 000000000..42be416a6
--- /dev/null
+++ b/tests/metagpt/utils/test_parse_html.py
@@ -0,0 +1,68 @@
+from metagpt.utils import parse_html
+
+PAGE = """
+
+
+
+ Random HTML Example
+
+
+ This is a Heading
+ This is a paragraph with a link and some emphasized text.
+
+ - Item 1
+ - Item 2
+ - Item 3
+
+
+ - Numbered Item 1
+ - Numbered Item 2
+ - Numbered Item 3
+
+
+
+ Header 1
+ Header 2
+
+
+ Row 1, Cell 1
+ Row 1, Cell 2
+
+
+ Row 2, Cell 1
+ Row 2, Cell 2
+
+
+
+
+
+
+
+"""
+
+CONTENT = 'This is a HeadingThis is a paragraph witha linkand someemphasizedtext.Item 1Item 2Item 3Numbered Item 1Numbered '\
+'Item 2Numbered Item 3Header 1Header 2Row 1, Cell 1Row 1, Cell 2Row 2, Cell 1Row 2, Cell 2Name:Email:SubmitThis is a div '\
+'with a class "box".a link'
+
+
+def test_web_page():
+ page = parse_html.WebPage(inner_text=CONTENT, html=PAGE, url="http://example.com")
+ assert page.title == "Random HTML Example"
+ assert list(page.get_links()) == ["http://example.com/test", "https://metagpt.com"]
+
+
+def test_get_page_content():
+ ret = parse_html.get_html_content(PAGE, "http://example.com")
+ assert ret == CONTENT
diff --git a/tests/metagpt/utils/test_pycst.py b/tests/metagpt/utils/test_pycst.py
new file mode 100644
index 000000000..07352eac2
--- /dev/null
+++ b/tests/metagpt/utils/test_pycst.py
@@ -0,0 +1,136 @@
+from metagpt.utils import pycst
+
+code = '''
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from typing import overload
+
+@overload
+def add_numbers(a: int, b: int):
+ ...
+
+@overload
+def add_numbers(a: float, b: float):
+ ...
+
+def add_numbers(a: int, b: int):
+ return a + b
+
+
+class Person:
+ def __init__(self, name: str, age: int):
+ self.name = name
+ self.age = age
+
+ def greet(self):
+ return f"Hello, my name is {self.name} and I am {self.age} years old."
+'''
+
+documented_code = '''
+"""
+This is an example module containing a function and a class definition.
+"""
+
+
+def add_numbers(a: int, b: int):
+ """This function is used to add two numbers and return the result.
+
+ Parameters:
+ a: The first integer.
+ b: The second integer.
+
+ Returns:
+ int: The sum of the two numbers.
+ """
+ return a + b
+
+class Person:
+ """This class represents a person's information, including name and age.
+
+ Attributes:
+ name: The person's name.
+ age: The person's age.
+ """
+
+ def __init__(self, name: str, age: int):
+ """Creates a new instance of the Person class.
+
+ Parameters:
+ name: The person's name.
+ age: The person's age.
+ """
+ ...
+
+ def greet(self):
+ """
+ Returns a greeting message including the name and age.
+
+ Returns:
+ str: The greeting message.
+ """
+ ...
+'''
+
+
+merged_code = '''
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+This is an example module containing a function and a class definition.
+"""
+
+from typing import overload
+
+@overload
+def add_numbers(a: int, b: int):
+ ...
+
+@overload
+def add_numbers(a: float, b: float):
+ ...
+
+def add_numbers(a: int, b: int):
+ """This function is used to add two numbers and return the result.
+
+ Parameters:
+ a: The first integer.
+ b: The second integer.
+
+ Returns:
+ int: The sum of the two numbers.
+ """
+ return a + b
+
+
+class Person:
+ """This class represents a person's information, including name and age.
+
+ Attributes:
+ name: The person's name.
+ age: The person's age.
+ """
+ def __init__(self, name: str, age: int):
+ """Creates a new instance of the Person class.
+
+ Parameters:
+ name: The person's name.
+ age: The person's age.
+ """
+ self.name = name
+ self.age = age
+
+ def greet(self):
+ """
+ Returns a greeting message including the name and age.
+
+ Returns:
+ str: The greeting message.
+ """
+ return f"Hello, my name is {self.name} and I am {self.age} years old."
+'''
+
+
+def test_merge_docstring():
+ data = pycst.merge_docstring(code, documented_code)
+ print(data)
+ assert data == merged_code
diff --git a/tests/metagpt/utils/test_text.py b/tests/metagpt/utils/test_text.py
new file mode 100644
index 000000000..0caf8abaa
--- /dev/null
+++ b/tests/metagpt/utils/test_text.py
@@ -0,0 +1,77 @@
+import pytest
+
+from metagpt.utils.text import (
+ decode_unicode_escape,
+ generate_prompt_chunk,
+ reduce_message_length,
+ split_paragraph,
+)
+
+
+def _msgs():
+ length = 20
+ while length:
+ yield "Hello," * 1000 * length
+ length -= 1
+
+
+def _paragraphs(n):
+ return " ".join("Hello World." for _ in range(n))
+
+
+@pytest.mark.parametrize(
+ "msgs, model_name, system_text, reserved, expected",
+ [
+ (_msgs(), "gpt-3.5-turbo", "System", 1500, 1),
+ (_msgs(), "gpt-3.5-turbo-16k", "System", 3000, 6),
+ (_msgs(), "gpt-3.5-turbo-16k", "Hello," * 1000, 3000, 5),
+ (_msgs(), "gpt-4", "System", 2000, 3),
+ (_msgs(), "gpt-4", "Hello," * 1000, 2000, 2),
+ (_msgs(), "gpt-4-32k", "System", 4000, 14),
+ (_msgs(), "gpt-4-32k", "Hello," * 2000, 4000, 12),
+ ]
+)
+def test_reduce_message_length(msgs, model_name, system_text, reserved, expected):
+ assert len(reduce_message_length(msgs, model_name, system_text, reserved)) / (len("Hello,")) / 1000 == expected
+
+
+@pytest.mark.parametrize(
+ "text, prompt_template, model_name, system_text, reserved, expected",
+ [
+ (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1500, 2),
+ (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
+ (" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
+ (" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
+ ]
+)
+def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):
+ ret = list(generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved))
+ assert len(ret) == expected
+
+
+@pytest.mark.parametrize(
+ "paragraph, sep, count, expected",
+ [
+ (_paragraphs(10), ".", 2, [_paragraphs(5), f" {_paragraphs(5)}"]),
+ (_paragraphs(10), ".", 3, [_paragraphs(4), f" {_paragraphs(3)}", f" {_paragraphs(3)}"]),
+ (f"{_paragraphs(5)}\n{_paragraphs(3)}", "\n.", 2, [f"{_paragraphs(5)}\n", _paragraphs(3)]),
+ ("......", ".", 2, ["...", "..."]),
+ ("......", ".", 3, ["..", "..", ".."]),
+ (".......", ".", 2, ["....", "..."]),
+ ]
+)
+def test_split_paragraph(paragraph, sep, count, expected):
+ ret = split_paragraph(paragraph, sep, count)
+ assert ret == expected
+
+
+@pytest.mark.parametrize(
+ "text, expected",
+ [
+ ("Hello\\nWorld", "Hello\nWorld"),
+ ("Hello\\tWorld", "Hello\tWorld"),
+ ("Hello\\u0020World", "Hello World"),
+ ]
+)
+def test_decode_unicode_escape(text, expected):
+ assert decode_unicode_escape(text) == expected