update docs

This commit is contained in:
shenchucheng 2024-02-01 16:56:45 +08:00
parent 9b613eec59
commit e246a9cff9
2 changed files with 91 additions and 0 deletions

View file

@ -17,6 +17,12 @@ from metagpt.tools import SearchEngineType
class SkSearchEngine:
"""A search engine class for executing searches.
Attributes:
search_engine: The search engine instance used for executing searches.
"""
def __init__(self, **kwargs):
self.search_engine = SearchEngine(**kwargs)
@ -32,6 +38,16 @@ class SkSearchEngine:
class SearchEngine(BaseModel):
"""A model for configuring and executing searches with different search engines.
Attributes:
model_config: Configuration for the model allowing arbitrary types.
engine: The type of search engine to use.
run_func: An optional callable for running the search. If not provided, it will be determined based on the engine.
api_key: An optional API key for the search engine.
proxy: An optional proxy for the search engine requests.
"""
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
engine: SearchEngineType = SearchEngineType.SERPER_GOOGLE
@ -41,6 +57,7 @@ class SearchEngine(BaseModel):
@model_validator(mode="after")
def validate_extra(self):
"""Validates extra fields provided to the model and updates the run function accordingly."""
data = self.model_dump(exclude={"engine"}, exclude_none=True, exclude_defaults=True)
if self.model_extra:
data.update(self.model_extra)
@ -52,6 +69,11 @@ class SearchEngine(BaseModel):
run_func: Optional[Callable[[str, int, bool], Coroutine[None, None, Union[str, list[str]]]]] = None,
**kwargs,
):
"""Processes extra configuration and updates the run function based on the search engine type.
Args:
run_func: An optional callable for running the search. If not provided, it will be determined based on the engine.
"""
if self.engine == SearchEngineType.SERPAPI_GOOGLE:
module = "metagpt.tools.search_engine_serpapi"
run_func = importlib.import_module(module).SerpAPIWrapper(**kwargs).run
@ -72,6 +94,11 @@ class SearchEngine(BaseModel):
@classmethod
def from_search_config(cls, config: SearchConfig, **kwargs):
"""Creates a SearchEngine instance from a SearchConfig.
Args:
config: The search configuration to use for creating the SearchEngine instance.
"""
data = config.model_dump(exclude={"api_type", "search_func"})
if config.search_func is not None:
data["run_func"] = config.search_func
@ -82,6 +109,11 @@ class SearchEngine(BaseModel):
def from_search_func(
cls, search_func: Callable[[str, int, bool], Coroutine[None, None, Union[str, list[str]]]], **kwargs
):
"""Creates a SearchEngine instance from a custom search function.
Args:
search_func: A callable that executes the search.
"""
return cls(engine=SearchEngineType.CUSTOM_ENGINE, run_func=search_func, **kwargs)
@overload
@ -115,6 +147,7 @@ class SearchEngine(BaseModel):
query: The search query.
max_results: The maximum number of results to return. Defaults to 8.
as_string: Whether to return the results as a string or a list of dictionaries. Defaults to True.
ignore_errors: Whether to ignore errors during the search. Defaults to False.
Returns:
The search results as a string or a list of dictionaries.

View file

@ -13,6 +13,19 @@ from metagpt.utils.parse_html import WebPage
class WebBrowserEngine(BaseModel):
"""Defines a web browser engine configuration for automated browsing and data extraction.
This class encapsulates the configuration and operational logic for different web browser engines,
such as Playwright, Selenium, or custom implementations. It provides a unified interface to run
browser automation tasks.
Attributes:
model_config: Configuration dictionary allowing arbitrary types and extra fields.
engine: The type of web browser engine to use.
run_func: An optional coroutine function to run the browser engine.
proxy: An optional proxy server URL to use with the browser engine.
"""
model_config = ConfigDict(arbitrary_types_allowed=True, extra="allow")
engine: WebBrowserEngineType = WebBrowserEngineType.PLAYWRIGHT
@ -21,6 +34,15 @@ class WebBrowserEngine(BaseModel):
@model_validator(mode="after")
def validate_extra(self):
"""Validates and processes extra configuration data after model initialization.
This method is automatically called by Pydantic to validate and process any extra configuration
data provided to the model. It ensures that the extra data is properly integrated into the model's
configuration and operational logic.
Returns:
The instance itself after processing the extra data.
"""
data = self.model_dump(exclude={"engine"}, exclude_none=True, exclude_defaults=True)
if self.model_extra:
data.update(self.model_extra)
@ -28,6 +50,17 @@ class WebBrowserEngine(BaseModel):
return self
def _process_extra(self, **kwargs):
"""Processes extra configuration data to set up the browser engine run function.
Depending on the specified engine type, this method dynamically imports and configures
the appropriate browser engine wrapper and its run function.
Args:
**kwargs: Arbitrary keyword arguments representing extra configuration data.
Raises:
NotImplementedError: If the engine type is not supported.
"""
if self.engine is WebBrowserEngineType.PLAYWRIGHT:
module = "metagpt.tools.web_browser_engine_playwright"
run_func = importlib.import_module(module).PlaywrightWrapper(**kwargs).run
@ -42,6 +75,19 @@ class WebBrowserEngine(BaseModel):
@classmethod
def from_browser_config(cls, config: BrowserConfig, **kwargs):
"""Creates a WebBrowserEngine instance from a BrowserConfig object and additional keyword arguments.
This class method facilitates the creation of a WebBrowserEngine instance by extracting
configuration data from a BrowserConfig object and optionally merging it with additional
keyword arguments.
Args:
config: A BrowserConfig object containing base configuration data.
**kwargs: Optional additional keyword arguments to override or extend the configuration.
Returns:
A new instance of WebBrowserEngine configured according to the provided arguments.
"""
data = config.model_dump()
return cls(**data, **kwargs)
@ -54,4 +100,16 @@ class WebBrowserEngine(BaseModel):
...
async def run(self, url: str, *urls: str) -> WebPage | list[WebPage]:
"""Runs the browser engine to load one or more web pages.
This method is the implementation of the overloaded run signatures. It delegates the task
of loading web pages to the configured run function, handling either a single URL or multiple URLs.
Args:
url: The URL of the first web page to load.
*urls: Additional URLs of web pages to load, if any.
Returns:
A WebPage object if a single URL is provided, or a list of WebPage objects if multiple URLs are provided.
"""
return await self.run_func(url, *urls)