代码优化

This commit is contained in:
liuminhui 2024-07-23 10:26:31 +08:00
parent f9d3a8c521
commit 6c39c80d94
3 changed files with 2 additions and 2 deletions

View file

@ -11,7 +11,6 @@ TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx"
TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf"
TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4"
TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3"
TEST_WEBSITE_URL = "https://github.com/geekan/MetaGPT"
async def omniparse_client_example():

View file

@ -315,7 +315,7 @@ class SimpleEngine(RetrieverQueryEngine):
def _get_file_extractor() -> dict[str:BaseReader]:
"""
Get the file extractor.
Currently, only PDF use OmniParse
Currently, only PDF use OmniParse. Other document types use the built-in reader from llama_index.
Returns:
dict[file_type: BaseReader]

View file

@ -122,6 +122,7 @@ class OmniParseClient:
OmniParsedResult: The result of the pdf parsing.
"""
self.verify_file_ext(file_input, {".pdf"})
# parse_pdf supports parsing by accepting only the byte data of the file.
file_info = await self.get_file_info(file_input, only_bytes=True)
endpoint = f"{self.parse_document_endpoint}/pdf"
resp = await self._request_parse(endpoint=endpoint, files={"file": file_info})