add arxiv reader

2026-06-23 15:48:11 +02:00 · 2024-03-19 10:58:23 +08:00 · 2024-03-19 10:58:23 +08:00 · c9f60f344b
commit c9f60f344b
parent b2bbf838ec
1 changed files with 26 additions and 0 deletions
--- a/examples/di/arxiv_reader.py
+++ b/examples/di/arxiv_reader.py
@ -0,0 +1,26 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+@Time    : 2024/01/15
+@Author  : mannaandpoem
+@File    : imitate_webpage.py
+"""
+from metagpt.roles.di.data_interpreter import DataInterpreter
+
+
+async def main():
+    template = "https://arxiv.org/list/{tag}/pastweek?skip=0&show=300"
+    tags = ["cs.ai", "cs.cl", "cs.lg", "cs.se"]
+    urls = [template.format(tag=tag) for tag in tags]
+    prompt = f"""This is a collection of arxiv urls: '{urls}' .
+Record each article, remove duplicates by title (they may have multiple tags), filter out papers related to 
+large language model / agent / , and pay attention to maintaining the original order as much as possible"""
+    di = DataInterpreter(react_mode="react", tools=["scrape_web_playwright"])
+
+    await di.run(prompt)
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    asyncio.run(main())