feat: Editor.read + file size检查

This commit is contained in:
莘权 马 2024-09-20 14:24:17 +08:00
parent d37954d43a
commit dccd199249
2 changed files with 13 additions and 6 deletions

View file

@ -104,14 +104,18 @@ class Editor(BaseModel):
async def read(self, path: str) -> FileBlock:
"""Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
error = FileBlock(
file_path=str(path),
block_content="The file is too large to read. Use `Editor.similarity_search` to read the file instead.",
)
path = Path(path)
if path.stat().st_size > 5 * DEFAULT_MIN_TOKEN_COUNT:
return error
content = await File.read_text_file(path)
if not content:
return FileBlock(file_path=str(path), block_content="")
if self.is_large_file(content=content):
return FileBlock(
file_path=str(path),
block_content="The file is too large to read. Use `Editor.similarity_search` to read the file instead.",
)
return error
self.resource.report(str(path), "path")
lines = content.splitlines(keepends=True)

View file

@ -9,6 +9,7 @@ from metagpt.tools.libs.editor import Editor
from metagpt.tools.libs.index_repo import (
CHATS_INDEX_ROOT,
CHATS_ROOT,
DEFAULT_MIN_TOKEN_COUNT,
UPLOAD_ROOT,
IndexRepo,
)
@ -780,8 +781,10 @@ async def test_similarity_search(query, filename):
@pytest.mark.asyncio
async def test_read():
editor = Editor()
content = await editor.read(str(TEST_DATA_PATH / "pdf/9112674.pdf"))
assert "similarity_search" in content.block_content
filename = TEST_DATA_PATH / "pdf/9112674.pdf"
content = await editor.read(str(filename))
size = filename.stat().st_size
assert "similarity_search" in content.block_content and size > 5 * DEFAULT_MIN_TOKEN_COUNT
if __name__ == "__main__":