Merge pull request #1061 from iorisa/feature/repo_to_markdown

feat: repo to markdown
This commit is contained in:
Alexander Wu 2024-03-21 22:44:58 +08:00 committed by GitHub
commit fdf53ac555
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 124 additions and 0 deletions

View file

@ -18,6 +18,7 @@ import csv
import importlib
import inspect
import json
import mimetypes
import os
import platform
import re
@ -819,3 +820,21 @@ See FAQ 5.8
"""
)
raise retry_state.outcome.exception()
def get_markdown_codeblock_type(filename: str) -> str:
"""Return the markdown code-block type corresponding to the file extension."""
mime_type, _ = mimetypes.guess_type(filename)
mappings = {
"text/x-shellscript": "bash",
"text/x-c++src": "cpp",
"text/css": "css",
"text/html": "html",
"text/x-java": "java",
"application/javascript": "javascript",
"application/json": "json",
"text/x-python": "python",
"text/x-ruby": "ruby",
"application/sql": "sql",
}
return mappings.get(mime_type, "text")

View file

@ -0,0 +1,80 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This file provides functionality to convert a local repository into a markdown representation.
"""
from __future__ import annotations
import mimetypes
from pathlib import Path
from gitignore_parser import parse_gitignore
from metagpt.logs import logger
from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files
from metagpt.utils.tree import tree
async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str:
"""
Convert a local repository into a markdown representation.
This function takes a path to a local repository and generates a markdown representation of the repository structure,
including directory trees and file listings.
Args:
repo_path (str | Path): The path to the local repository.
output (str | Path, optional): The path to save the generated markdown file. Defaults to None.
gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None.
Returns:
str: The markdown representation of the repository.
"""
repo_path = Path(repo_path)
gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve()
markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore)
gitignore_rules = parse_gitignore(full_path=str(gitignore))
markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules)
if output:
await awrite(filename=str(output), data=markdown, encoding="utf-8")
return markdown
async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str:
try:
content = tree(repo_path, gitignore, run_command=True)
except Exception as e:
logger.info(f"{e}, using safe mode.")
content = tree(repo_path, gitignore, run_command=False)
doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n"
return doc
async def _write_files(repo_path, gitignore_rules) -> str:
filenames = list_files(repo_path)
markdown = ""
for filename in filenames:
if gitignore_rules(str(filename)):
continue
markdown += await _write_file(filename=filename, repo_path=repo_path)
return markdown
async def _write_file(filename: Path, repo_path: Path) -> str:
relative_path = filename.relative_to(repo_path)
markdown = f"## {relative_path}\n"
mime_type, _ = mimetypes.guess_type(filename.name)
if "text/" not in mime_type:
logger.info(f"Ignore content: {filename}")
markdown += "<binary file>\n---\n\n"
return markdown
content = await aread(filename, encoding="utf-8")
content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-")
code_block_type = get_markdown_codeblock_type(filename.name)
markdown += f"```{code_block_type}\n{content}\n```\n---\n\n"
return markdown