mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-26 17:26:22 +02:00
Merge pull request #1061 from iorisa/feature/repo_to_markdown
feat: repo to markdown
This commit is contained in:
commit
fdf53ac555
3 changed files with 124 additions and 0 deletions
|
|
@ -18,6 +18,7 @@ import csv
|
|||
import importlib
|
||||
import inspect
|
||||
import json
|
||||
import mimetypes
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
|
|
@ -819,3 +820,21 @@ See FAQ 5.8
|
|||
"""
|
||||
)
|
||||
raise retry_state.outcome.exception()
|
||||
|
||||
|
||||
def get_markdown_codeblock_type(filename: str) -> str:
|
||||
"""Return the markdown code-block type corresponding to the file extension."""
|
||||
mime_type, _ = mimetypes.guess_type(filename)
|
||||
mappings = {
|
||||
"text/x-shellscript": "bash",
|
||||
"text/x-c++src": "cpp",
|
||||
"text/css": "css",
|
||||
"text/html": "html",
|
||||
"text/x-java": "java",
|
||||
"application/javascript": "javascript",
|
||||
"application/json": "json",
|
||||
"text/x-python": "python",
|
||||
"text/x-ruby": "ruby",
|
||||
"application/sql": "sql",
|
||||
}
|
||||
return mappings.get(mime_type, "text")
|
||||
|
|
|
|||
80
metagpt/utils/repo_to_markdown.py
Normal file
80
metagpt/utils/repo_to_markdown.py
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This file provides functionality to convert a local repository into a markdown representation.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
|
||||
from gitignore_parser import parse_gitignore
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.common import aread, awrite, get_markdown_codeblock_type, list_files
|
||||
from metagpt.utils.tree import tree
|
||||
|
||||
|
||||
async def repo_to_markdown(repo_path: str | Path, output: str | Path = None, gitignore: str | Path = None) -> str:
|
||||
"""
|
||||
Convert a local repository into a markdown representation.
|
||||
|
||||
This function takes a path to a local repository and generates a markdown representation of the repository structure,
|
||||
including directory trees and file listings.
|
||||
|
||||
Args:
|
||||
repo_path (str | Path): The path to the local repository.
|
||||
output (str | Path, optional): The path to save the generated markdown file. Defaults to None.
|
||||
gitignore (str | Path, optional): The path to the .gitignore file. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: The markdown representation of the repository.
|
||||
"""
|
||||
repo_path = Path(repo_path)
|
||||
gitignore = Path(gitignore or Path(__file__).parent / "../../.gitignore").resolve()
|
||||
|
||||
markdown = await _write_dir_tree(repo_path=repo_path, gitignore=gitignore)
|
||||
|
||||
gitignore_rules = parse_gitignore(full_path=str(gitignore))
|
||||
markdown += await _write_files(repo_path=repo_path, gitignore_rules=gitignore_rules)
|
||||
|
||||
if output:
|
||||
await awrite(filename=str(output), data=markdown, encoding="utf-8")
|
||||
return markdown
|
||||
|
||||
|
||||
async def _write_dir_tree(repo_path: Path, gitignore: Path) -> str:
|
||||
try:
|
||||
content = tree(repo_path, gitignore, run_command=True)
|
||||
except Exception as e:
|
||||
logger.info(f"{e}, using safe mode.")
|
||||
content = tree(repo_path, gitignore, run_command=False)
|
||||
|
||||
doc = f"## Directory Tree\n```text\n{content}\n```\n---\n\n"
|
||||
return doc
|
||||
|
||||
|
||||
async def _write_files(repo_path, gitignore_rules) -> str:
|
||||
filenames = list_files(repo_path)
|
||||
markdown = ""
|
||||
for filename in filenames:
|
||||
if gitignore_rules(str(filename)):
|
||||
continue
|
||||
markdown += await _write_file(filename=filename, repo_path=repo_path)
|
||||
return markdown
|
||||
|
||||
|
||||
async def _write_file(filename: Path, repo_path: Path) -> str:
|
||||
relative_path = filename.relative_to(repo_path)
|
||||
markdown = f"## {relative_path}\n"
|
||||
|
||||
mime_type, _ = mimetypes.guess_type(filename.name)
|
||||
if "text/" not in mime_type:
|
||||
logger.info(f"Ignore content: {filename}")
|
||||
markdown += "<binary file>\n---\n\n"
|
||||
return markdown
|
||||
content = await aread(filename, encoding="utf-8")
|
||||
content = content.replace("```", "\\`\\`\\`").replace("---", "\\-\\-\\-")
|
||||
code_block_type = get_markdown_codeblock_type(filename.name)
|
||||
markdown += f"```{code_block_type}\n{content}\n```\n---\n\n"
|
||||
return markdown
|
||||
Loading…
Add table
Add a link
Reference in a new issue