mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-06-05 14:55:18 +02:00
Merge branch 'geekan:main' into main
This commit is contained in:
commit
a0e6d20034
50 changed files with 1734 additions and 250 deletions
|
|
@ -14,6 +14,7 @@ class SearchEngineType(Enum):
|
|||
SERPAPI_GOOGLE = auto()
|
||||
DIRECT_GOOGLE = auto()
|
||||
SERPER_GOOGLE = auto()
|
||||
DUCK_DUCK_GO = auto()
|
||||
CUSTOM_ENGINE = auto()
|
||||
|
||||
|
||||
|
|
|
|||
107
metagpt/tools/search_engine_ddg.py
Normal file
107
metagpt/tools/search_engine_ddg.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from concurrent import futures
|
||||
from typing import Literal, overload
|
||||
|
||||
from duckduckgo_search import DDGS
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class DDGAPIWrapper:
|
||||
"""Wrapper around duckduckgo_search API.
|
||||
|
||||
To use this module, you should have the `duckduckgo_search` Python package installed.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
loop: asyncio.AbstractEventLoop | None = None,
|
||||
executor: futures.Executor | None = None,
|
||||
):
|
||||
kwargs = {}
|
||||
if CONFIG.global_proxy:
|
||||
kwargs["proxies"] = CONFIG.global_proxy
|
||||
self.loop = loop
|
||||
self.executor = executor
|
||||
self.ddgs = DDGS(**kwargs)
|
||||
|
||||
@overload
|
||||
def run(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 8,
|
||||
as_string: Literal[True] = True,
|
||||
focus: list[str] | None = None,
|
||||
) -> str:
|
||||
...
|
||||
|
||||
@overload
|
||||
def run(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 8,
|
||||
as_string: Literal[False] = False,
|
||||
focus: list[str] | None = None,
|
||||
) -> list[dict[str, str]]:
|
||||
...
|
||||
|
||||
async def run(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 8,
|
||||
as_string: bool = True,
|
||||
) -> str | list[dict]:
|
||||
"""Return the results of a Google search using the official Google API
|
||||
|
||||
Args:
|
||||
query: The search query.
|
||||
max_results: The number of results to return.
|
||||
as_string: A boolean flag to determine the return type of the results. If True, the function will
|
||||
return a formatted string with the search results. If False, it will return a list of dictionaries
|
||||
containing detailed information about each search result.
|
||||
|
||||
Returns:
|
||||
The results of the search.
|
||||
"""
|
||||
loop = self.loop or asyncio.get_event_loop()
|
||||
future = loop.run_in_executor(
|
||||
self.executor,
|
||||
self._search_from_ddgs,
|
||||
query,
|
||||
max_results,
|
||||
)
|
||||
try:
|
||||
search_results = await future
|
||||
# Extract the search result items from the response
|
||||
|
||||
except HttpError as e:
|
||||
# Handle errors in the API call
|
||||
logger.exception(f"fail to search {query} for {e}")
|
||||
search_results = []
|
||||
|
||||
# Return the list of search result URLs
|
||||
if as_string:
|
||||
return json.dumps(search_results, ensure_ascii=False)
|
||||
return search_results
|
||||
|
||||
def _search_from_ddgs(self, query: str, max_results: int):
|
||||
return [
|
||||
{
|
||||
"link": i["href"],
|
||||
"snippet": i["body"],
|
||||
"title": i["title"]
|
||||
} for (_, i) in zip(range(max_results), self.ddgs.text(query))
|
||||
]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import fire
|
||||
|
||||
fire.Fire(DDGAPIWrapper().run)
|
||||
117
metagpt/tools/search_engine_googleapi.py
Normal file
117
metagpt/tools/search_engine_googleapi.py
Normal file
|
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
from concurrent import futures
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httplib2
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
|
||||
from metagpt.config import CONFIG
|
||||
from metagpt.logs import logger
|
||||
|
||||
|
||||
class GoogleAPIWrapper:
|
||||
"""Wrapper around GoogleAPI.
|
||||
|
||||
To use this module, you should have the `google-api-python-client` Python package installed
|
||||
and set property values for the configurations `GOOGLE_API_KEY` and `GOOGLE_CSE_ID`. See
|
||||
https://programmablesearchengine.google.com/controlpanel/all.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
loop: asyncio.AbstractEventLoop | None = None,
|
||||
executor: futures.Executor | None = None,
|
||||
):
|
||||
build_kwargs = {"developerKey": CONFIG.google_api_key}
|
||||
if CONFIG.global_proxy:
|
||||
parse_result = urlparse(CONFIG.global_proxy)
|
||||
proxy_type = parse_result.scheme
|
||||
if proxy_type == "https":
|
||||
proxy_type = "http"
|
||||
build_kwargs["http"] = httplib2.Http(
|
||||
proxy_info=httplib2.ProxyInfo(
|
||||
getattr(httplib2.socks, f"PROXY_TYPE_{proxy_type.upper()}"),
|
||||
parse_result.hostname,
|
||||
parse_result.port,
|
||||
),
|
||||
)
|
||||
service = build("customsearch", "v1", **build_kwargs)
|
||||
self.google_api_client = service.cse()
|
||||
self.custom_search_engine_id = CONFIG.google_cse_id
|
||||
self.loop = loop
|
||||
self.executor = executor
|
||||
|
||||
async def run(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 8,
|
||||
as_string: bool = True,
|
||||
focus: list[str] | None = None,
|
||||
) -> str | list[dict]:
|
||||
"""Return the results of a Google search using the official Google API.
|
||||
|
||||
Args:
|
||||
query: The search query.
|
||||
max_results: The number of results to return.
|
||||
as_string: A boolean flag to determine the return type of the results. If True, the function will
|
||||
return a formatted string with the search results. If False, it will return a list of dictionaries
|
||||
containing detailed information about each search result.
|
||||
focus: Specific information to be focused on from each search result.
|
||||
|
||||
Returns:
|
||||
The results of the search.
|
||||
"""
|
||||
loop = self.loop or asyncio.get_event_loop()
|
||||
future = loop.run_in_executor(
|
||||
self.executor,
|
||||
self.google_api_client.list(
|
||||
q=query,
|
||||
num=max_results,
|
||||
cx=self.custom_search_engine_id
|
||||
).execute
|
||||
)
|
||||
try:
|
||||
result = await future
|
||||
# Extract the search result items from the response
|
||||
search_results = result.get("items", [])
|
||||
|
||||
except HttpError as e:
|
||||
# Handle errors in the API call
|
||||
logger.exception(f"fail to search {query} for {e}")
|
||||
search_results = []
|
||||
|
||||
focus = focus or ["snippet", "link", "title"]
|
||||
details = [{i: j for i, j in item_dict.items() if i in focus} for item_dict in search_results]
|
||||
# Return the list of search result URLs
|
||||
if as_string:
|
||||
return safe_google_results(details)
|
||||
|
||||
return details
|
||||
|
||||
|
||||
def safe_google_results(results: str | list) -> str:
|
||||
"""Return the results of a google search in a safe format.
|
||||
|
||||
Args:
|
||||
results: The search results.
|
||||
|
||||
Returns:
|
||||
The results of the search.
|
||||
"""
|
||||
if isinstance(results, list):
|
||||
safe_message = json.dumps([result for result in results])
|
||||
else:
|
||||
safe_message = results.encode("utf-8", "ignore").decode("utf-8")
|
||||
return safe_message
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import fire
|
||||
|
||||
fire.Fire(GoogleAPIWrapper().run)
|
||||
Loading…
Add table
Add a link
Reference in a new issue