mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-02 14:45:17 +02:00
feat: Editor.read + large file check
This commit is contained in:
parent
b22d96dc58
commit
aa5b78a370
2 changed files with 22 additions and 1 deletions
|
|
@ -10,10 +10,11 @@ import tempfile
|
|||
from pathlib import Path
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import tiktoken
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from metagpt.const import DEFAULT_WORKSPACE_ROOT
|
||||
from metagpt.tools.libs.index_repo import IndexRepo
|
||||
from metagpt.tools.libs.index_repo import DEFAULT_MIN_TOKEN_COUNT, IndexRepo
|
||||
from metagpt.tools.libs.linter import Linter
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.file import File
|
||||
|
|
@ -106,6 +107,11 @@ class Editor(BaseModel):
|
|||
content = await File.read_text_file(path)
|
||||
if not content:
|
||||
return FileBlock(file_path=str(path), block_content="")
|
||||
if self.is_large_file(content=content):
|
||||
return FileBlock(
|
||||
file_path=str(path),
|
||||
block_content="The file is too large to read. Use `Editor.similarity_search` to read the file instead.",
|
||||
)
|
||||
self.resource.report(str(path), "path")
|
||||
|
||||
lines = content.splitlines(keepends=True)
|
||||
|
|
@ -959,3 +965,10 @@ class Editor(BaseModel):
|
|||
>>> print(texts)
|
||||
"""
|
||||
return await IndexRepo.cross_repo_search(query=query, file_or_path=file_or_path)
|
||||
|
||||
@staticmethod
|
||||
def is_large_file(content: str, mix_token_count: int = 0) -> bool:
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
token_count = len(encoding.encode(content))
|
||||
mix_token_count = mix_token_count or DEFAULT_MIN_TOKEN_COUNT
|
||||
return token_count >= mix_token_count
|
||||
|
|
|
|||
|
|
@ -737,5 +737,13 @@ async def test_similarity_search(query, filename):
|
|||
save_to.unlink(missing_ok=True)
|
||||
|
||||
|
||||
@pytest.mark.skip
|
||||
@pytest.mark.asyncio
|
||||
async def test_read():
|
||||
editor = Editor()
|
||||
content = await editor.read(str(TEST_DATA_PATH / "pdf/9112674.pdf"))
|
||||
assert "similarity_search" in content.block_content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-s"])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue