mg集成omniparse

This commit is contained in:
liuminhui 2024-07-18 20:40:20 +08:00
parent 39eb534ca0
commit 22b9990ccf
14 changed files with 381 additions and 14 deletions

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,45 @@
import asyncio
from llama_parse import ResultType
from metagpt.config2 import config
from metagpt.logs import logger
from metagpt.rag.parser.omniparse.client import OmniParseClient
from metagpt.rag.parser.omniparse.parse import OmniParse
from metagpt.rag.schema import OmniParseOptions, OmniParseType
async def omniparse_client_example():
client = OmniParseClient(base_url=config.omniparse.base_url)
with open("../data/rag/test01.docx", "rb") as f:
filelike = f.read()
parse_document_ret = await client.parse_document(filelike=filelike, bytes_filename="test_01.docx")
logger.info(parse_document_ret)
parse_pdf_ret = await client.parse_pdf(filelike="../data/rag/test02.pdf")
logger.info(parse_pdf_ret)
async def omniparse_example():
parser = OmniParse(
api_key=config.omniparse.api_key,
base_url=config.omniparse.base_url,
parse_options=OmniParseOptions(parse_type=OmniParseType.PDF, result_type=ResultType.MD)
)
ret = await parser.aload_data(file_path="../data/rag/test02.pdf")
logger.info(ret)
file_paths = ["../data/rag/test01.docx", "../data/rag/test02.pdf"]
parser.parse_type = OmniParseType.DOCUMENT
ret = await parser.aload_data(file_path=file_paths)
logger.info(ret)
async def main():
await omniparse_client_example()
await omniparse_example()
if __name__ == '__main__':
asyncio.run(main())

View file

@ -2,7 +2,7 @@
import asyncio
from examples.rag_pipeline import DOC_PATH, QUESTION
from examples.rag.rag_pipeline import DOC_PATH, QUESTION
from metagpt.logs import logger
from metagpt.rag.engines import SimpleEngine
from metagpt.roles import Sales