mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-27 09:46:24 +02:00
代码优化
This commit is contained in:
parent
22b9990ccf
commit
5287e024c5
8 changed files with 106 additions and 34 deletions
BIN
examples/data/parse/test03.mp4
Normal file
BIN
examples/data/parse/test03.mp4
Normal file
Binary file not shown.
BIN
examples/data/parse/test04.mp3
Normal file
BIN
examples/data/parse/test04.mp3
Normal file
Binary file not shown.
|
|
@ -7,30 +7,56 @@ from metagpt.logs import logger
|
|||
from metagpt.rag.parser.omniparse.client import OmniParseClient
|
||||
from metagpt.rag.parser.omniparse.parse import OmniParse
|
||||
from metagpt.rag.schema import OmniParseOptions, OmniParseType
|
||||
from metagpt.const import EXAMPLE_DATA_PATH
|
||||
|
||||
TEST_DOCX = EXAMPLE_DATA_PATH / "parse/test01.docx"
|
||||
TEST_PDF = EXAMPLE_DATA_PATH / "parse/test02.pdf"
|
||||
TEST_VIDEO = EXAMPLE_DATA_PATH / "parse/test03.mp4"
|
||||
TEST_AUDIO = EXAMPLE_DATA_PATH / "parse/test04.mp3"
|
||||
TEST_WEBSITE_URL = "https://github.com/geekan/MetaGPT"
|
||||
|
||||
|
||||
async def omniparse_client_example():
|
||||
client = OmniParseClient(base_url=config.omniparse.base_url)
|
||||
|
||||
with open("../data/rag/test01.docx", "rb") as f:
|
||||
# docx
|
||||
with open(TEST_DOCX, "rb") as f:
|
||||
filelike = f.read()
|
||||
parse_document_ret = await client.parse_document(filelike=filelike, bytes_filename="test_01.docx")
|
||||
logger.info(parse_document_ret)
|
||||
document_parse_ret = await client.parse_document(filelike=filelike, bytes_filename="test_01.docx")
|
||||
logger.info(document_parse_ret)
|
||||
|
||||
parse_pdf_ret = await client.parse_pdf(filelike="../data/rag/test02.pdf")
|
||||
logger.info(parse_pdf_ret)
|
||||
# pdf
|
||||
pdf_parse_ret = await client.parse_pdf(filelike=TEST_PDF)
|
||||
logger.info(pdf_parse_ret)
|
||||
|
||||
# video
|
||||
video_parse_ret = await client.parse_video(filelike=TEST_VIDEO)
|
||||
logger.info(video_parse_ret)
|
||||
|
||||
# audio
|
||||
audio_parse_ret = await client.parse_audio(filelike=TEST_AUDIO)
|
||||
logger.info(audio_parse_ret)
|
||||
|
||||
# website fixme:omniparse官方api还存在问题
|
||||
# website_parse_ret = await client.parse_website(url=TEST_WEBSITE_URL)
|
||||
# logger.info(website_parse_ret)
|
||||
|
||||
|
||||
async def omniparse_example():
|
||||
parser = OmniParse(
|
||||
api_key=config.omniparse.api_key,
|
||||
base_url=config.omniparse.base_url,
|
||||
parse_options=OmniParseOptions(parse_type=OmniParseType.PDF, result_type=ResultType.MD)
|
||||
parse_options=OmniParseOptions(
|
||||
parse_type=OmniParseType.PDF,
|
||||
result_type=ResultType.MD,
|
||||
max_timeout=120,
|
||||
num_workers=3,
|
||||
)
|
||||
)
|
||||
ret = await parser.aload_data(file_path="../data/rag/test02.pdf")
|
||||
ret = parser.load_data(file_path=TEST_PDF)
|
||||
logger.info(ret)
|
||||
|
||||
file_paths = ["../data/rag/test01.docx", "../data/rag/test02.pdf"]
|
||||
file_paths = [TEST_DOCX, TEST_PDF]
|
||||
parser.parse_type = OmniParseType.DOCUMENT
|
||||
ret = await parser.aload_data(file_path=file_paths)
|
||||
logger.info(ret)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue