From 8fa8b4b141249828c572c462289769d257ba82c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Tue, 13 Aug 2024 17:35:49 +0800 Subject: [PATCH 1/3] Try common encoding formats when reading a file. --- metagpt/tools/libs/editor.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index c2fdcb859..eed92e96b 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -219,9 +219,25 @@ class Editor(BaseModel): @staticmethod def _read_text(path: Union[str, Path]) -> List[str]: - with open(str(path), "r") as f: - lines = f.readlines() - return lines + encoding_format_list = [ + "utf-8", + "ascii", + "gb2312", + "gbk", + "iso-8859-1", + "cp1252", + "utf-16", + "utf-16-le", + "utf-16-be", + ] + for encoding in encoding_format_list: + try: + with open(str(path), "r", encoding=encoding) as f: + lines = f.readlines() + return lines + except: + pass + return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`. Please ask a human for help."] @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: From 91d5e8491e01d75eede6e23535e8b4ab88c1ac20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Tue, 13 Aug 2024 19:51:38 +0800 Subject: [PATCH 2/3] fix format issuse --- metagpt/tools/libs/editor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index eed92e96b..e314cb15d 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -237,7 +237,7 @@ class Editor(BaseModel): return lines except: pass - return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`. Please ask a human for help."] + return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`."] @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: From f14bbf9fc5f8d6547abfa4b0eb17b48dcde69aad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E4=BC=9F=E9=9F=AC?= Date: Wed, 14 Aug 2024 10:14:38 +0800 Subject: [PATCH 3/3] =?UTF-8?q?=E4=BD=BF=E7=94=A8=20comon.aread=20?= =?UTF-8?q?=E8=AF=BB=E5=8F=96=E6=9C=AA=E7=9F=A5=E7=BC=96=E7=A0=81=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/tools/libs/editor.py | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index e314cb15d..f1eb9d933 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -10,7 +10,7 @@ from pydantic import BaseModel, ConfigDict from metagpt.logs import logger from metagpt.tools.tool_registry import register_tool from metagpt.utils import read_docx -from metagpt.utils.common import aread_bin, awrite_bin +from metagpt.utils.common import aread, aread_bin, awrite_bin from metagpt.utils.repo_to_markdown import is_text_file from metagpt.utils.report import EditorReporter @@ -48,7 +48,7 @@ class Editor(BaseModel): """Read the whole content of a file. Using absolute paths as the argument for specifying the file location.""" is_text, mime_type = await is_text_file(path) if is_text: - lines = self._read_text(path) + lines = await self._read_text(path) elif mime_type == "application/pdf": lines = await self._read_pdf(path) elif mime_type in { @@ -218,26 +218,10 @@ class Editor(BaseModel): return lint_passed, lint_message @staticmethod - def _read_text(path: Union[str, Path]) -> List[str]: - encoding_format_list = [ - "utf-8", - "ascii", - "gb2312", - "gbk", - "iso-8859-1", - "cp1252", - "utf-16", - "utf-16-le", - "utf-16-be", - ] - for encoding in encoding_format_list: - try: - with open(str(path), "r", encoding=encoding) as f: - lines = f.readlines() - return lines - except: - pass - return [f"Reading failed: `{path}` cannot be decoded by `{encoding_format_list}`."] + async def _read_text(path: Union[str, Path]) -> List[str]: + content = await aread(path) + lines = content.split("\n") + return lines @staticmethod async def _read_pdf(path: Union[str, Path]) -> List[str]: