This commit is contained in:
femto 2024-10-11 16:48:08 +08:00
commit a7efa27ce0
55 changed files with 1009 additions and 72 deletions

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

64
examples/rag/omniparse.py Normal file
View file

@ -0,0 +1,64 @@
import asyncio
from metagpt.config2 import config
from metagpt.const import EXAMPLE_DATA_PATH
from metagpt.logs import logger
from metagpt.rag.parsers import OmniParse
from metagpt.rag.schema import OmniParseOptions, OmniParseType, ParseResultType
from metagpt.utils.omniparse_client import OmniParseClient
TEST_DOCX = EXAMPLE_DATA_PATH / "omniparse/test01.docx"
TEST_PDF = EXAMPLE_DATA_PATH / "omniparse/test02.pdf"
TEST_VIDEO = EXAMPLE_DATA_PATH / "omniparse/test03.mp4"
TEST_AUDIO = EXAMPLE_DATA_PATH / "omniparse/test04.mp3"
async def omniparse_client_example():
client = OmniParseClient(base_url=config.omniparse.base_url)
# docx
with open(TEST_DOCX, "rb") as f:
file_input = f.read()
document_parse_ret = await client.parse_document(file_input=file_input, bytes_filename="test_01.docx")
logger.info(document_parse_ret)
# pdf
pdf_parse_ret = await client.parse_pdf(file_input=TEST_PDF)
logger.info(pdf_parse_ret)
# video
video_parse_ret = await client.parse_video(file_input=TEST_VIDEO)
logger.info(video_parse_ret)
# audio
audio_parse_ret = await client.parse_audio(file_input=TEST_AUDIO)
logger.info(audio_parse_ret)
async def omniparse_example():
parser = OmniParse(
api_key=config.omniparse.api_key,
base_url=config.omniparse.base_url,
parse_options=OmniParseOptions(
parse_type=OmniParseType.PDF,
result_type=ParseResultType.MD,
max_timeout=120,
num_workers=3,
),
)
ret = parser.load_data(file_path=TEST_PDF)
logger.info(ret)
file_paths = [TEST_DOCX, TEST_PDF]
parser.parse_type = OmniParseType.DOCUMENT
ret = await parser.aload_data(file_path=file_paths)
logger.info(ret)
async def main():
await omniparse_client_example()
await omniparse_example()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -2,7 +2,7 @@
import asyncio
from examples.rag_pipeline import DOC_PATH, QUESTION
from examples.rag.rag_pipeline import DOC_PATH, QUESTION
from metagpt.logs import logger
from metagpt.rag.engines import SimpleEngine
from metagpt.roles import Sales