Merge branch 'mgx_ops' into di_mgx

This commit is contained in:
yzlin 2024-03-30 16:45:21 +08:00
commit 3e10d34468
304 changed files with 10747 additions and 662 deletions

View file

@ -0,0 +1,22 @@
import asyncio
import threading
from typing import Any
def run_coroutine_in_new_loop(coroutine) -> Any:
"""Runs a coroutine in a new, separate event loop on a different thread.
This function is useful when try to execute an async function within a sync function, but encounter the error `RuntimeError: This event loop is already running`.
"""
new_loop = asyncio.new_event_loop()
t = threading.Thread(target=lambda: new_loop.run_forever())
t.start()
future = asyncio.run_coroutine_threadsafe(coroutine, new_loop)
try:
return future.result()
finally:
new_loop.call_soon_threadsafe(new_loop.stop)
t.join()
new_loop.close()

View file

@ -18,6 +18,7 @@ import csv
import importlib
import inspect
import json
import mimetypes
import os
import platform
import re
@ -29,6 +30,7 @@ from typing import Any, Callable, List, Literal, Tuple, Union
from urllib.parse import quote, unquote
import aiofiles
import chardet
import loguru
import requests
from PIL import Image
@ -663,14 +665,21 @@ def role_raise_decorator(func):
@handle_exception
async def aread(filename: str | Path, encoding=None) -> str:
async def aread(filename: str | Path, encoding="utf-8") -> str:
"""Read file asynchronously."""
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
try:
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
except UnicodeDecodeError:
async with aiofiles.open(str(filename), mode="rb") as reader:
raw = await reader.read()
result = chardet.detect(raw)
detected_encoding = result["encoding"]
content = raw.decode(detected_encoding)
return content
async def awrite(filename: str | Path, data: str, encoding=None):
async def awrite(filename: str | Path, data: str, encoding="utf-8"):
"""Write file asynchronously."""
pathname = Path(filename)
pathname.parent.mkdir(parents=True, exist_ok=True)
@ -765,7 +774,7 @@ def is_coroutine_func(func: Callable) -> bool:
def load_mc_skills_code(skill_names: list[str] = None, skills_dir: Path = None) -> list[str]:
"""load mincraft skill from js files"""
"""load minecraft skill from js files"""
if not skills_dir:
skills_dir = Path(__file__).parent.absolute()
if skill_names is None:
@ -802,29 +811,6 @@ def decode_image(img_url_or_b64: str) -> Image:
return img
def process_message(messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
"""convert messages to list[dict]."""
from metagpt.schema import Message
# 全部转成list
if not isinstance(messages, list):
messages = [messages]
# 转成list[dict]
processed_messages = []
for msg in messages:
if isinstance(msg, str):
processed_messages.append({"role": "user", "content": msg})
elif isinstance(msg, dict):
assert set(msg.keys()) == set(["role", "content"])
processed_messages.append(msg)
elif isinstance(msg, Message):
processed_messages.append(msg.to_dict())
else:
raise ValueError(f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!")
return processed_messages
def log_and_reraise(retry_state: RetryCallState):
logger.error(f"Retry attempts exhausted. Last exception: {retry_state.outcome.exception()}")
logger.warning(
@ -834,3 +820,21 @@ See FAQ 5.8
"""
)
raise retry_state.outcome.exception()
def get_markdown_codeblock_type(filename: str) -> str:
"""Return the markdown code-block type corresponding to the file extension."""
mime_type, _ = mimetypes.guess_type(filename)
mappings = {
"text/x-shellscript": "bash",
"text/x-c++src": "cpp",
"text/css": "css",
"text/html": "html",
"text/x-java": "java",
"application/javascript": "javascript",
"application/json": "json",
"text/x-python": "python",
"text/x-ruby": "ruby",
"application/sql": "sql",
}
return mappings.get(mime_type, "text")

View file

@ -13,9 +13,7 @@ import re
from pathlib import Path
from typing import Set
import aiofiles
from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.exceptions import handle_exception
@ -45,8 +43,7 @@ class DependencyFile:
async def save(self):
"""Save dependencies to the file asynchronously."""
data = json.dumps(self._dependencies)
async with aiofiles.open(str(self._filename), mode="w") as writer:
await writer.write(data)
await awrite(filename=self._filename, data=data)
async def update(self, filename: Path | str, dependencies: Set[Path | str], persist=True):
"""Update dependencies for a file asynchronously.

View file

@ -5,12 +5,15 @@
@Author : alexanderwu
@File : embedding.py
"""
from langchain_community.embeddings import OpenAIEmbeddings
from llama_index.embeddings.openai import OpenAIEmbedding
from metagpt.config2 import config
def get_embedding():
def get_embedding() -> OpenAIEmbedding:
llm = config.get_openai_llm()
embedding = OpenAIEmbeddings(openai_api_key=llm.api_key, openai_api_base=llm.base_url)
if llm is None:
raise ValueError("To use OpenAIEmbedding, please ensure that config.llm.api_type is correctly set to 'openai'.")
embedding = OpenAIEmbedding(api_key=llm.api_key, api_base=llm.base_url)
return embedding

View file

@ -14,11 +14,9 @@ from datetime import datetime
from pathlib import Path
from typing import Dict, List, Set
import aiofiles
from metagpt.logs import logger
from metagpt.schema import Document
from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.json_to_markdown import json_to_markdown
@ -55,8 +53,7 @@ class FileRepository:
pathname = self.workdir / filename
pathname.parent.mkdir(parents=True, exist_ok=True)
content = content if content else "" # avoid `argument must be str, not None` to make it continue
async with aiofiles.open(str(pathname), mode="w") as writer:
await writer.write(content)
await awrite(filename=str(pathname), data=content)
logger.info(f"save to: {str(pathname)}")
if dependencies is not None:

View file

@ -9,11 +9,9 @@ import asyncio
import os
from pathlib import Path
import aiofiles
from metagpt.config2 import config
from metagpt.logs import logger
from metagpt.utils.common import check_cmd_exists
from metagpt.utils.common import awrite, check_cmd_exists
async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int:
@ -30,9 +28,7 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
tmp = Path(f"{output_file_without_suffix}.mmd")
async with aiofiles.open(tmp, "w", encoding="utf-8") as f:
await f.write(mermaid_code)
# tmp.write_text(mermaid_code, encoding="utf-8")
await awrite(filename=tmp, data=mermaid_code)
if engine == "nodejs":
if check_cmd_exists(config.mermaid.path) != 0:

View file

@ -54,5 +54,5 @@ def save_history(role: Role, save_dir: str = ""):
with open(save_path / "plan.json", "w", encoding="utf-8") as plan_file:
json.dump(plan, plan_file, indent=4, ensure_ascii=False)
save_code_file(name=Path(record_time) / "history_nb", code_context=role.execute_code.nb, file_format="ipynb")
save_code_file(name=Path(record_time), code_context=role.execute_code.nb, file_format="ipynb")
return save_path

View file

@ -0,0 +1,18 @@
"""class tools, including method inspection, class attributes, inheritance relationships, etc."""
def check_methods(C, *methods):
"""Check if the class has methods. borrow from _collections_abc.
Useful when implementing implicit interfaces, such as defining an abstract class, isinstance can be used for determination without inheritance.
"""
mro = C.__mro__
for method in methods:
for B in mro:
if method in B.__dict__:
if B.__dict__[method] is None:
return NotImplemented
break
else:
return NotImplemented
return True

View file

@ -340,7 +340,9 @@ def extract_state_value_from_output(content: str) -> str:
content (str): llm's output from `Role._think`
"""
content = content.strip() # deal the output cases like " 0", "0\n" and so on.
pattern = r"([0-9])" # TODO find the number using a more proper method not just extract from content using pattern
pattern = (
r"(?<!-)[0-9]" # TODO find the number using a more proper method not just extract from content using pattern
)
matches = re.findall(pattern, content, re.DOTALL)
matches = list(set(matches))
state = matches[0] if len(matches) > 0 else "-1"

View file

@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This file provides functionality to convert a local repository into a markdown representation.
"""
from __future__ import annotations
import mimetypes
from pathlib import Path
from gitignore_parser import parse_gitignore
from metagpt.logs import logger
from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files
from metagpt.utils.tree import tree
async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str:
"""
Convert a local repository into a markdown representation.
This function takes a path to a local repository and generates a markdown representation of the repository structure,
including directory trees and file listings.
Args:
repo_path (str | Path): The path to the local repository.
output (str | Path, optional): The path to save the generated markdown file. Defaults to None.
gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None.
Returns:
str: The markdown representation of the repository.
"""
repo_path = Path(repo_path)
gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve()
markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore)
gitignore_rules = parse_gitignore(full_path=str(gitignore))
markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules)
if output:
await awrite(filename=str(output), data=markdown, encoding="utf-8")
return markdown
async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str:
try:
content = tree(repo_path, gitignore, run_command=True)
except Exception as e:
logger.info(f"{e}, using safe mode.")
content = tree(repo_path, gitignore, run_command=False)
doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n"
return doc
async def _write_files(repo_path, gitignore_rules) -> str:
filenames = list_files(repo_path)
markdown = ""
for filename in filenames:
if gitignore_rules(str(filename)):
continue
markdown += await _write_file(filename=filename, repo_path=repo_path)
return markdown
async def _write_file(filename: Path, repo_path: Path) -> str:
relative_path = filename.relative_to(repo_path)
markdown = f"## {relative_path}\n"
mime_type, _ = mimetypes.guess_type(filename.name)
if "text/" not in mime_type:
logger.info(f"Ignore content: {filename}")
markdown += "<binary file>\n---\n\n"
return markdown
content = await aread(filename, encoding="utf-8")
content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-")
code_block_type = get_markdown_codeblock_type(filename.name)
markdown += f"```{code_block_type}\n{content}\n```\n---\n\n"
return markdown

View file

@ -21,6 +21,7 @@ TOKEN_COSTS = {
"gpt-35-turbo": {"prompt": 0.0015, "completion": 0.002},
"gpt-35-turbo-16k": {"prompt": 0.003, "completion": 0.004},
"gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
"gpt-3.5-turbo-0125": {"prompt": 0.001, "completion": 0.002},
"gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
"gpt-4": {"prompt": 0.03, "completion": 0.06},
"gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
@ -48,6 +49,8 @@ TOKEN_COSTS = {
"claude-2.1": {"prompt": 0.008, "completion": 0.024},
"claude-3-sonnet-20240229": {"prompt": 0.003, "completion": 0.015},
"claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075},
"yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003},
"yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017},
}
@ -140,25 +143,24 @@ FIREWORKS_GRADE_TOKEN_COSTS = {
"mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
}
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
TOKEN_MAX = {
"gpt-3.5-turbo": 4096,
"gpt-3.5-turbo-0301": 4096,
"gpt-3.5-turbo-0613": 4096,
"gpt-3.5-turbo-16k": 16384,
"gpt-3.5-turbo-16k-0613": 16384,
"gpt-35-turbo": 4096,
"gpt-35-turbo-16k": 16384,
"gpt-3.5-turbo-1106": 16384,
"gpt-4-0314": 8192,
"gpt-4": 8192,
"gpt-4-32k": 32768,
"gpt-4-32k-0314": 32768,
"gpt-4-0613": 8192,
"gpt-4-turbo-preview": 128000,
"gpt-4-0125-preview": 128000,
"gpt-4-turbo-preview": 128000,
"gpt-4-1106-preview": 128000,
"gpt-4-vision-preview": 128000,
"gpt-4-1106-vision-preview": 128000,
"gpt-4": 8192,
"gpt-4-0613": 8192,
"gpt-4-32k": 32768,
"gpt-4-32k-0613": 32768,
"gpt-3.5-turbo-0125": 16385,
"gpt-3.5-turbo": 16385,
"gpt-3.5-turbo-1106": 16385,
"gpt-3.5-turbo-instruct": 4096,
"gpt-3.5-turbo-16k": 16385,
"gpt-3.5-turbo-0613": 4096,
"gpt-3.5-turbo-16k-0613": 16385,
"text-embedding-ada-002": 8192,
"glm-3-turbo": 128000,
"glm-4": 128000,
@ -176,10 +178,12 @@ TOKEN_MAX = {
"claude-2.1": 200000,
"claude-3-sonnet-20240229": 200000,
"claude-3-opus-20240229": 200000,
"yi-34b-chat-0205": 4000,
"yi-34b-chat-200k": 200000,
}
def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
def count_message_tokens(messages, model="gpt-3.5-turbo-0125"):
"""Return the number of tokens used by a list of messages."""
try:
encoding = tiktoken.encoding_for_model(model)
@ -193,6 +197,7 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
"gpt-35-turbo-16k",
"gpt-3.5-turbo-16k",
"gpt-3.5-turbo-1106",
"gpt-3.5-turbo-0125",
"gpt-4-0314",
"gpt-4-32k-0314",
"gpt-4-0613",
@ -209,8 +214,8 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
tokens_per_message = 4 # every message follows <|start|>{role/name}\n{content}<|end|>\n
tokens_per_name = -1 # if there's a name, the role is omitted
elif "gpt-3.5-turbo" == model:
print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.")
return count_message_tokens(messages, model="gpt-3.5-turbo-0125")
elif "gpt-4" == model:
print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
return count_message_tokens(messages, model="gpt-4-0613")
@ -224,7 +229,7 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
else:
raise NotImplementedError(
f"num_tokens_from_messages() is not implemented for model {model}. "
f"See https://github.com/openai/openai-python/blob/main/chatml.md "
f"See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken "
f"for information on how messages are converted to tokens."
)
num_tokens = 0

140
metagpt/utils/tree.py Normal file
View file

@ -0,0 +1,140 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/3/11
@Author : mashenquan
@File : tree.py
@Desc : Implement the same functionality as the `tree` command.
Example:
>>> print_tree(".")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- cost_manager.py
+-- __pycache__
| +-- __init__.cpython-39.pyc
| +-- redis.cpython-39.pyc
| +-- singleton.cpython-39.pyc
| +-- embedding.cpython-39.pyc
| +-- make_sk_kernel.cpython-39.pyc
| +-- file_repository.cpython-39.pyc
+-- file.py
+-- save_code.py
+-- common.py
+-- redis.py
"""
from __future__ import annotations
import subprocess
from pathlib import Path
from typing import Callable, Dict, List
from gitignore_parser import parse_gitignore
def tree(root: str | Path, gitignore: str | Path = None, run_command: bool = False) -> str:
"""
Recursively traverses the directory structure and prints it out in a tree-like format.
Args:
root (str or Path): The root directory from which to start traversing.
gitignore (str or Path): The filename of gitignore file.
run_command (bool): Whether to execute `tree` command. Execute the `tree` command and return the result if True,
otherwise execute python code instead.
Returns:
str: A string representation of the directory tree.
Example:
>>> tree(".")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- __pycache__
| +-- __init__.cpython-39.pyc
| +-- redis.cpython-39.pyc
| +-- singleton.cpython-39.pyc
+-- parse_docstring.py
>>> tree(".", gitignore="../../.gitignore")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- parse_docstring.py
>>> tree(".", gitignore="../../.gitignore", run_command=True)
utils
serialize.py
project_repo.py
tree.py
mmdc_playwright.py
parse_docstring.py
"""
root = Path(root).resolve()
if run_command:
return _execute_tree(root, gitignore)
git_ignore_rules = parse_gitignore(gitignore) if gitignore else None
dir_ = {root.name: _list_children(root=root, git_ignore_rules=git_ignore_rules)}
v = _print_tree(dir_)
return "\n".join(v)
def _list_children(root: Path, git_ignore_rules: Callable) -> Dict[str, Dict]:
dir_ = {}
for i in root.iterdir():
if git_ignore_rules and git_ignore_rules(str(i)):
continue
try:
if i.is_file():
dir_[i.name] = {}
else:
dir_[i.name] = _list_children(root=i, git_ignore_rules=git_ignore_rules)
except (FileNotFoundError, PermissionError, OSError):
dir_[i.name] = {}
return dir_
def _print_tree(dir_: Dict[str:Dict]) -> List[str]:
ret = []
for name, children in dir_.items():
ret.append(name)
if not children:
continue
lines = _print_tree(children)
for j, v in enumerate(lines):
if v[0] not in ["+", " ", "|"]:
ret = _add_line(ret)
row = f"+-- {v}"
else:
row = f" {v}"
ret.append(row)
return ret
def _add_line(rows: List[str]) -> List[str]:
for i in range(len(rows) - 1, -1, -1):
v = rows[i]
if v[0] != " ":
return rows
rows[i] = "|" + v[1:]
return rows
def _execute_tree(root: Path, gitignore: str | Path) -> str:
args = ["--gitfile", str(gitignore)] if gitignore else []
try:
result = subprocess.run(["tree"] + args + [str(root)], capture_output=True, text=True, check=True)
if result.returncode != 0:
raise ValueError(f"tree exits with code {result.returncode}")
return result.stdout
except subprocess.CalledProcessError as e:
raise e