mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-02 14:45:17 +02:00
代码优化
This commit is contained in:
parent
f9d3a8c521
commit
6c39c80d94
3 changed files with 2 additions and 2 deletions
|
|
@ -11,7 +11,6 @@ TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx"
|
|||
TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf"
|
||||
TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4"
|
||||
TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3"
|
||||
TEST_WEBSITE_URL = "https://github.com/geekan/MetaGPT"
|
||||
|
||||
|
||||
async def omniparse_client_example():
|
||||
|
|
|
|||
|
|
@ -315,7 +315,7 @@ class SimpleEngine(RetrieverQueryEngine):
|
|||
def _get_file_extractor() -> dict[str:BaseReader]:
|
||||
"""
|
||||
Get the file extractor.
|
||||
Currently, only PDF use OmniParse
|
||||
Currently, only PDF use OmniParse. Other document types use the built-in reader from llama_index.
|
||||
|
||||
Returns:
|
||||
dict[file_type: BaseReader]
|
||||
|
|
|
|||
|
|
@ -122,6 +122,7 @@ class OmniParseClient:
|
|||
OmniParsedResult: The result of the pdf parsing.
|
||||
"""
|
||||
self.verify_file_ext(file_input, {".pdf"})
|
||||
# parse_pdf supports parsing by accepting only the byte data of the file.
|
||||
file_info = await self.get_file_info(file_input, only_bytes=True)
|
||||
endpoint = f"{self.parse_document_endpoint}/pdf"
|
||||
resp = await self._request_parse(endpoint=endpoint, files={"file": file_info})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue