diff --git a/metagpt/utils/common.py b/metagpt/utils/common.py index c8daba724..833028dbf 100644 --- a/metagpt/utils/common.py +++ b/metagpt/utils/common.py @@ -18,6 +18,7 @@ import csv import importlib import inspect import json +import mimetypes import os import platform import re @@ -819,3 +820,21 @@ See FAQ 5.8 """ ) raise retry_state.outcome.exception() + + +def get_markdown_codeblock_type(filename: str) -> str: + """Return the markdown code-block type corresponding to the file extension.""" + mime_type, _ = mimetypes.guess_type(filename) + mappings = { + "text/x-shellscript": "bash", + "text/x-c++src": "cpp", + "text/css": "css", + "text/html": "html", + "text/x-java": "java", + "application/javascript": "javascript", + "application/json": "json", + "text/x-python": "python", + "text/x-ruby": "ruby", + "application/sql": "sql", + } + return mappings.get(mime_type, "text") diff --git a/metagpt/utils/repo_to_markdown.py b/metagpt/utils/repo_to_markdown.py new file mode 100644 index 000000000..76dfe1b82 --- /dev/null +++ b/metagpt/utils/repo_to_markdown.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" +This file provides functionality to convert a local repository into a markdown representation. +""" +from __future__ import annotations + +import mimetypes +from pathlib import Path + +from gitignore_parser import parse_gitignore + +from metagpt.logs import logger +from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files +from metagpt.utils.tree import tree + + +async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str: + """ + Convert a local repository into a markdown representation. + + This function takes a path to a local repository and generates a markdown representation of the repository structure, + including directory trees and file listings. + + Args: + repo_path (str | Path): The path to the local repository. + output (str | Path, optional): The path to save the generated markdown file. Defaults to None. + gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None. + + Returns: + str: The markdown representation of the repository. + """ + repo_path = Path(repo_path) + gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve() + + markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore) + + gitignore_rules = parse_gitignore(full_path=str(gitignore)) + markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules) + + if output: + await awrite(filename=str(output), data=markdown, encoding="utf-8") + return markdown + + +async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str: + try: + content = tree(repo_path, gitignore, run_command=True) + except Exception as e: + logger.info(f"{e}, using safe mode.") + content = tree(repo_path, gitignore, run_command=False) + + doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n" + return doc + + +async def _write_files(repo_path, gitignore_rules) -> str: + filenames = list_files(repo_path) + markdown = "" + for filename in filenames: + if gitignore_rules(str(filename)): + continue + markdown += await _write_file(filename=filename, repo_path=repo_path) + return markdown + + +async def _write_file(filename: Path, repo_path: Path) -> str: + relative_path = filename.relative_to(repo_path) + markdown = f"## {relative_path}\n" + + mime_type, _ = mimetypes.guess_type(filename.name) + if "text/" not in mime_type: + logger.info(f"Ignore content: {filename}") + markdown += "\n---\n\n" + return markdown + content = await aread(filename, encoding="utf-8") + content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-") + code_block_type = get_markdown_codeblock_type(filename.name) + markdown += f"```{code_block_type}\n{content}\n```\n---\n\n" + return markdown diff --git a/tests/metagpt/utils/test_repo_to_markdown.py b/tests/metagpt/utils/test_repo_to_markdown.py new file mode 100644 index 000000000..914c50dd7 --- /dev/null +++ b/tests/metagpt/utils/test_repo_to_markdown.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import uuid +from pathlib import Path + +import pytest + +from metagpt.utils.repo_to_markdown import repo_to_markdown + + +@pytest.mark.parametrize( + ["repo_path", "output"], + [(Path(__file__).parent.parent, Path(__file__).parent.parent.parent / f"workspace/unittest/{uuid.uuid4().hex}.md")], +) +@pytest.mark.asyncio +async def test_repo_to_markdown(repo_path: Path, output: Path): + markdown = await repo_to_markdown(repo_path=repo_path, output=output) + assert output.exists() + assert markdown + + output.unlink(missing_ok=True) + + +if __name__ == "__main__": + pytest.main([__file__, "-s"])