From dccd19924999811aae8c553836f8bab2d0cb9aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8E=98=E6=9D=83=20=E9=A9=AC?= Date: Fri, 20 Sep 2024 14:24:17 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20Editor.read=20+=20file=20size=E6=A3=80?= =?UTF-8?q?=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- metagpt/tools/libs/editor.py | 12 ++++++++---- tests/metagpt/tools/libs/test_editor.py | 7 +++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/metagpt/tools/libs/editor.py b/metagpt/tools/libs/editor.py index 7fff568dc..36f4022f3 100644 --- a/metagpt/tools/libs/editor.py +++ b/metagpt/tools/libs/editor.py @@ -104,14 +104,18 @@ class Editor(BaseModel): async def read(self, path: str) -> FileBlock: """Read the whole content of a file. Using absolute paths as the argument for specifying the file location.""" + error = FileBlock( + file_path=str(path), + block_content="The file is too large to read. Use `Editor.similarity_search` to read the file instead.", + ) + path = Path(path) + if path.stat().st_size > 5 * DEFAULT_MIN_TOKEN_COUNT: + return error content = await File.read_text_file(path) if not content: return FileBlock(file_path=str(path), block_content="") if self.is_large_file(content=content): - return FileBlock( - file_path=str(path), - block_content="The file is too large to read. Use `Editor.similarity_search` to read the file instead.", - ) + return error self.resource.report(str(path), "path") lines = content.splitlines(keepends=True) diff --git a/tests/metagpt/tools/libs/test_editor.py b/tests/metagpt/tools/libs/test_editor.py index e5b900504..6727b2fd6 100644 --- a/tests/metagpt/tools/libs/test_editor.py +++ b/tests/metagpt/tools/libs/test_editor.py @@ -9,6 +9,7 @@ from metagpt.tools.libs.editor import Editor from metagpt.tools.libs.index_repo import ( CHATS_INDEX_ROOT, CHATS_ROOT, + DEFAULT_MIN_TOKEN_COUNT, UPLOAD_ROOT, IndexRepo, ) @@ -780,8 +781,10 @@ async def test_similarity_search(query, filename): @pytest.mark.asyncio async def test_read(): editor = Editor() - content = await editor.read(str(TEST_DATA_PATH / "pdf/9112674.pdf")) - assert "similarity_search" in content.block_content + filename = TEST_DATA_PATH / "pdf/9112674.pdf" + content = await editor.read(str(filename)) + size = filename.stat().st_size + assert "similarity_search" in content.block_content and size > 5 * DEFAULT_MIN_TOKEN_COUNT if __name__ == "__main__":