Merge branch 'main' into fix_typo

This commit is contained in:
Alexander Wu 2024-03-22 11:53:54 +08:00 committed by GitHub
commit 334149bb5d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 491 additions and 128 deletions

View file

@ -18,6 +18,7 @@ import csv
import importlib
import inspect
import json
import mimetypes
import os
import platform
import re
@ -29,6 +30,7 @@ from typing import Any, Callable, List, Literal, Tuple, Union
from urllib.parse import quote, unquote
import aiofiles
import chardet
import loguru
import requests
from PIL import Image
@ -663,14 +665,21 @@ def role_raise_decorator(func):
@handle_exception
async def aread(filename: str | Path, encoding=None) -> str:
async def aread(filename: str | Path, encoding="utf-8") -> str:
"""Read file asynchronously."""
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
try:
async with aiofiles.open(str(filename), mode="r", encoding=encoding) as reader:
content = await reader.read()
except UnicodeDecodeError:
async with aiofiles.open(str(filename), mode="rb") as reader:
raw = await reader.read()
result = chardet.detect(raw)
detected_encoding = result["encoding"]
content = raw.decode(detected_encoding)
return content
async def awrite(filename: str | Path, data: str, encoding=None):
async def awrite(filename: str | Path, data: str, encoding="utf-8"):
"""Write file asynchronously."""
pathname = Path(filename)
pathname.parent.mkdir(parents=True, exist_ok=True)
@ -811,3 +820,21 @@ See FAQ 5.8
"""
)
raise retry_state.outcome.exception()
def get_markdown_codeblock_type(filename: str) -> str:
"""Return the markdown code-block type corresponding to the file extension."""
mime_type, _ = mimetypes.guess_type(filename)
mappings = {
"text/x-shellscript": "bash",
"text/x-c++src": "cpp",
"text/css": "css",
"text/html": "html",
"text/x-java": "java",
"application/javascript": "javascript",
"application/json": "json",
"text/x-python": "python",
"text/x-ruby": "ruby",
"application/sql": "sql",
}
return mappings.get(mime_type, "text")

View file

@ -13,9 +13,7 @@ import re
from pathlib import Path
from typing import Set
import aiofiles
from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.exceptions import handle_exception
@ -45,8 +43,7 @@ class DependencyFile:
async def save(self):
"""Save dependencies to the file asynchronously."""
data = json.dumps(self._dependencies)
async with aiofiles.open(str(self._filename), mode="w") as writer:
await writer.write(data)
await awrite(filename=self._filename, data=data)
async def update(self, filename: Path | str, dependencies: Set[Path | str], persist=True):
"""Update dependencies for a file asynchronously.

View file

@ -14,11 +14,9 @@ from datetime import datetime
from pathlib import Path
from typing import Dict, List, Set
import aiofiles
from metagpt.logs import logger
from metagpt.schema import Document
from metagpt.utils.common import aread
from metagpt.utils.common import aread, awrite
from metagpt.utils.json_to_markdown import json_to_markdown
@ -55,8 +53,7 @@ class FileRepository:
pathname = self.workdir / filename
pathname.parent.mkdir(parents=True, exist_ok=True)
content = content if content else "" # avoid `argument must be str, not None` to make it continue
async with aiofiles.open(str(pathname), mode="w") as writer:
await writer.write(content)
await awrite(filename=str(pathname), data=content)
logger.info(f"save to: {str(pathname)}")
if dependencies is not None:

View file

@ -9,11 +9,9 @@ import asyncio
import os
from pathlib import Path
import aiofiles
from metagpt.config2 import config
from metagpt.logs import logger
from metagpt.utils.common import check_cmd_exists
from metagpt.utils.common import awrite, check_cmd_exists
async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, width=2048, height=2048) -> int:
@ -30,9 +28,7 @@ async def mermaid_to_file(engine, mermaid_code, output_file_without_suffix, widt
if dir_name and not os.path.exists(dir_name):
os.makedirs(dir_name)
tmp = Path(f"{output_file_without_suffix}.mmd")
async with aiofiles.open(tmp, "w", encoding="utf-8") as f:
await f.write(mermaid_code)
# tmp.write_text(mermaid_code, encoding="utf-8")
await awrite(filename=tmp, data=mermaid_code)
if engine == "nodejs":
if check_cmd_exists(config.mermaid.path) != 0:

View file

@ -340,7 +340,9 @@ def extract_state_value_from_output(content: str) -> str:
content (str): llm's output from `Role._think`
"""
content = content.strip() # deal the output cases like " 0", "0\n" and so on.
pattern = r"([0-9])" # TODO find the number using a more proper method not just extract from content using pattern
pattern = (
r"(?<!-)[0-9]" # TODO find the number using a more proper method not just extract from content using pattern
)
matches = re.findall(pattern, content, re.DOTALL)
matches = list(set(matches))
state = matches[0] if len(matches) > 0 else "-1"

View file

@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This file provides functionality to convert a local repository into a markdown representation.
"""
from __future__ import annotations
import mimetypes
from pathlib import Path
from gitignore_parser import parse_gitignore
from metagpt.logs import logger
from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files
from metagpt.utils.tree import tree
async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str:
"""
Convert a local repository into a markdown representation.
This function takes a path to a local repository and generates a markdown representation of the repository structure,
including directory trees and file listings.
Args:
repo_path (str | Path): The path to the local repository.
output (str | Path, optional): The path to save the generated markdown file. Defaults to None.
gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None.
Returns:
str: The markdown representation of the repository.
"""
repo_path = Path(repo_path)
gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve()
markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore)
gitignore_rules = parse_gitignore(full_path=str(gitignore))
markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules)
if output:
await awrite(filename=str(output), data=markdown, encoding="utf-8")
return markdown
async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str:
try:
content = tree(repo_path, gitignore, run_command=True)
except Exception as e:
logger.info(f"{e}, using safe mode.")
content = tree(repo_path, gitignore, run_command=False)
doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n"
return doc
async def _write_files(repo_path, gitignore_rules) -> str:
filenames = list_files(repo_path)
markdown = ""
for filename in filenames:
if gitignore_rules(str(filename)):
continue
markdown += await _write_file(filename=filename, repo_path=repo_path)
return markdown
async def _write_file(filename: Path, repo_path: Path) -> str:
relative_path = filename.relative_to(repo_path)
markdown = f"## {relative_path}\n"
mime_type, _ = mimetypes.guess_type(filename.name)
if "text/" not in mime_type:
logger.info(f"Ignore content: {filename}")
markdown += "<binary file>\n---\n\n"
return markdown
content = await aread(filename, encoding="utf-8")
content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-")
code_block_type = get_markdown_codeblock_type(filename.name)
markdown += f"```{code_block_type}\n{content}\n```\n---\n\n"
return markdown

140
metagpt/utils/tree.py Normal file
View file

@ -0,0 +1,140 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@Time : 2024/3/11
@Author : mashenquan
@File : tree.py
@Desc : Implement the same functionality as the `tree` command.
Example:
>>> print_tree(".")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- cost_manager.py
+-- __pycache__
| +-- __init__.cpython-39.pyc
| +-- redis.cpython-39.pyc
| +-- singleton.cpython-39.pyc
| +-- embedding.cpython-39.pyc
| +-- make_sk_kernel.cpython-39.pyc
| +-- file_repository.cpython-39.pyc
+-- file.py
+-- save_code.py
+-- common.py
+-- redis.py
"""
from __future__ import annotations
import subprocess
from pathlib import Path
from typing import Callable, Dict, List
from gitignore_parser import parse_gitignore
def tree(root: str | Path, gitignore: str | Path = None, run_command: bool = False) -> str:
"""
Recursively traverses the directory structure and prints it out in a tree-like format.
Args:
root (str or Path): The root directory from which to start traversing.
gitignore (str or Path): The filename of gitignore file.
run_command (bool): Whether to execute `tree` command. Execute the `tree` command and return the result if True,
otherwise execute python code instead.
Returns:
str: A string representation of the directory tree.
Example:
>>> tree(".")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- __pycache__
| +-- __init__.cpython-39.pyc
| +-- redis.cpython-39.pyc
| +-- singleton.cpython-39.pyc
+-- parse_docstring.py
>>> tree(".", gitignore="../../.gitignore")
utils
+-- serialize.py
+-- project_repo.py
+-- tree.py
+-- mmdc_playwright.py
+-- parse_docstring.py
>>> tree(".", gitignore="../../.gitignore", run_command=True)
utils
serialize.py
project_repo.py
tree.py
mmdc_playwright.py
parse_docstring.py
"""
root = Path(root).resolve()
if run_command:
return _execute_tree(root, gitignore)
git_ignore_rules = parse_gitignore(gitignore) if gitignore else None
dir_ = {root.name: _list_children(root=root, git_ignore_rules=git_ignore_rules)}
v = _print_tree(dir_)
return "\n".join(v)
def _list_children(root: Path, git_ignore_rules: Callable) -> Dict[str, Dict]:
dir_ = {}
for i in root.iterdir():
if git_ignore_rules and git_ignore_rules(str(i)):
continue
try:
if i.is_file():
dir_[i.name] = {}
else:
dir_[i.name] = _list_children(root=i, git_ignore_rules=git_ignore_rules)
except (FileNotFoundError, PermissionError, OSError):
dir_[i.name] = {}
return dir_
def _print_tree(dir_: Dict[str:Dict]) -> List[str]:
ret = []
for name, children in dir_.items():
ret.append(name)
if not children:
continue
lines = _print_tree(children)
for j, v in enumerate(lines):
if v[0] not in ["+", " ", "|"]:
ret = _add_line(ret)
row = f"+-- {v}"
else:
row = f" {v}"
ret.append(row)
return ret
def _add_line(rows: List[str]) -> List[str]:
for i in range(len(rows) - 1, -1, -1):
v = rows[i]
if v[0] != " ":
return rows
rows[i] = "|" + v[1:]
return rows
def _execute_tree(root: Path, gitignore: str | Path) -> str:
args = ["--gitfile", str(gitignore)] if gitignore else []
try:
result = subprocess.run(["tree"] + args + [str(root)], capture_output=True, text=True, check=True)
if result.returncode != 0:
raise ValueError(f"tree exits with code {result.returncode}")
return result.stdout
except subprocess.CalledProcessError as e:
raise e