Merge branch 'fixbug/editor_pdf' into 'mgx_ops'

fixbug: async Editor.read

See merge request pub/MetaGPT!293
This commit is contained in:
林义章 2024-08-09 09:55:36 +00:00
commit 7292f29fe3
3 changed files with 12 additions and 38 deletions

View file

@ -11,7 +11,7 @@ from metagpt.const import DEFAULT_WORKSPACE_ROOT
from metagpt.logs import logger
from metagpt.tools.tool_registry import register_tool
from metagpt.utils import read_docx
from metagpt.utils.common import aread_bin, awrite_bin, run_coroutine_sync
from metagpt.utils.common import aread_bin, awrite_bin
from metagpt.utils.repo_to_markdown import is_text_file
from metagpt.utils.report import EditorReporter
@ -45,13 +45,13 @@ class Editor:
# self.resource.report(path, "path")
return f"The writing/coding the of the file {os.path.basename(path)}' is now completed. The file '{os.path.basename(path)}' has been successfully created."
def read(self, path: str) -> FileBlock:
async def read(self, path: str) -> FileBlock:
"""Read the whole content of a file. Using absolute paths as the argument for specifying the file location."""
is_text, mime_type = run_coroutine_sync(is_text_file, path)
is_text, mime_type = await is_text_file(path)
if is_text:
lines = self._read_text(path)
elif mime_type == "application/pdf":
lines = self._read_pdf(path)
lines = await self._read_pdf(path)
elif mime_type in {
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
@ -59,7 +59,7 @@ class Editor:
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
"application/vnd.ms-word.template.macroEnabled.12",
}:
lines = self._read_docx(path)
lines = await self._read_docx(path)
else:
return FileBlock(file_path=str(path), block_content="")
self.resource.report(str(path), "path")
@ -225,8 +225,8 @@ class Editor:
return lines
@staticmethod
def _read_pdf(path: Union[str, Path]) -> List[str]:
result = run_coroutine_sync(Editor._omniparse_read_file, path)
async def _read_pdf(path: Union[str, Path]) -> List[str]:
result = await Editor._omniparse_read_file(path)
if result:
return result
@ -237,8 +237,8 @@ class Editor:
return [i.text for i in lines]
@staticmethod
def _read_docx(path: Union[str, Path]) -> List[str]:
result = run_coroutine_sync(Editor._omniparse_read_file, path)
async def _read_docx(path: Union[str, Path]) -> List[str]:
result = await Editor._omniparse_read_file(path)
if result:
return result
return read_docx(str(path))

View file

@ -12,7 +12,6 @@
from __future__ import annotations
import ast
import asyncio
import base64
import contextlib
import csv
@ -1054,32 +1053,6 @@ def tool2name(cls, methods: List[str], entry) -> Dict[str, Any]:
return mappings
def run_coroutine_sync(coroutine, *args, **kwargs):
"""
Runs a coroutine function synchronously by encapsulating its invocation as a non-coroutine function call.
Args:
coroutine: The coroutine function to be encapsulated.
*args: Positional arguments to be passed to the coroutine.
**kwargs: Keyword arguments to be passed to the coroutine.
Returns:
The return value of the coroutine.
"""
try:
loop = asyncio.get_running_loop()
except RuntimeError: # No running event loop
loop = None
if loop and loop.is_running():
# The event loop is already running
future = asyncio.run_coroutine_threadsafe(coroutine(*args, **kwargs), loop)
return future.result()
else:
# The event loop is not running
return asyncio.run(coroutine(*args, **kwargs))
def log_time(method):
"""A time-consuming decorator for printing execution duration."""

View file

@ -141,9 +141,10 @@ def test_new_content_format_issue(test_file):
TEST_DATA_PATH / "movie/trailer.mp4",
],
)
def test_read_files(filename):
@pytest.mark.asyncio
async def test_read_files(filename):
editor = Editor()
file_block = editor.read(filename)
file_block = await editor.read(filename)
assert file_block
assert file_block.file_path
if filename.suffix not in [".png", ".mp3", ".mp4"]: