feat: merge gitlab:mgx_ops

This commit is contained in:
莘权 马 2024-06-01 18:57:55 +08:00
commit 5a001a501d
17 changed files with 215 additions and 44 deletions

View file

@ -646,7 +646,7 @@ def role_raise_decorator(func):
raise Exception(format_trackback_info(limit=None))
except Exception as e:
if self.latest_observed_msg:
logger.warning(
logger.exception(
"There is a exception in role's execution, in order to resume, "
"we delete the newest role communication message in the role's memory."
)

View file

@ -9,6 +9,7 @@
from pathlib import Path
import aiofiles
from fsspec.implementations.memory import MemoryFileSystem as _MemoryFileSystem
from metagpt.logs import logger
from metagpt.utils.exceptions import handle_exception
@ -68,3 +69,10 @@ class File:
content = b"".join(chunks)
logger.debug(f"Successfully read file, the path of file: {file_path}")
return content
class MemoryFileSystem(_MemoryFileSystem):
@classmethod
def _strip_protocol(cls, path):
return super()._strip_protocol(str(path))

View file

@ -7,6 +7,8 @@ from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup
from pydantic import BaseModel, PrivateAttr
import htmlmin
class WebPage(BaseModel):
inner_text: str
@ -38,6 +40,22 @@ class WebPage(BaseModel):
elif url.startswith(("http://", "https://")):
yield urljoin(self.url, url)
def get_slim_soup(self, keep_links: bool = False):
soup = _get_soup(self.html)
keep_attrs = ["class"]
if keep_links:
keep_attrs.append("href")
for i in soup.find_all(True):
for name in list(i.attrs):
if i[name] and name not in keep_attrs:
del i[name]
for i in soup.find_all(["svg", "img", "video", "audio"]):
i.decompose()
return soup
def get_html_content(page: str, base: str):
soup = _get_soup(page)
@ -48,7 +66,12 @@ def get_html_content(page: str, base: str):
def _get_soup(page: str):
soup = BeautifulSoup(page, "html.parser")
# https://stackoverflow.com/questions/1936466/how-to-scrape-only-visible-webpage-text-with-beautifulsoup
for s in soup(["style", "script", "[document]", "head", "title"]):
for s in soup(["style", "script", "[document]", "head", "title", "footer"]):
s.extract()
return soup
def simplify_html(html: str, url: str, keep_links: bool = False):
html = WebPage(inner_text="", html=html, url=url).get_slim_soup(keep_links).decode()
return htmlmin.minify(html, remove_comments=True, remove_empty_space=True)

View file

@ -39,6 +39,7 @@ class BlockType(str, Enum):
GALLERY = "Gallery"
NOTEBOOK = "Notebook"
DOCS = "Docs"
THOUGHT = "Thought"
END_MARKER_NAME = "end_marker"
@ -55,23 +56,23 @@ class ResourceReporter(BaseModel):
callback_url: str = Field(METAGPT_REPORTER_DEFAULT_URL, description="The URL to which the report should be sent")
_llm_task: Optional[asyncio.Task] = PrivateAttr(None)
def report(self, value: Any, name: str):
def report(self, value: Any, name: str, extra: Optional[dict] = None):
"""Synchronously report resource observation data.
Args:
value: The data to report.
name: The type name of the data.
"""
return self._report(value, name)
return self._report(value, name, extra)
async def async_report(self, value: Any, name: str):
async def async_report(self, value: Any, name: str, extra: Optional[dict] = None):
"""Asynchronously report resource observation data.
Args:
value: The data to report.
name: The type name of the data.
"""
return await self._async_report(value, name)
return await self._async_report(value, name, extra)
@classmethod
def set_report_fn(cls, fn: Callable):
@ -100,20 +101,20 @@ class ResourceReporter(BaseModel):
"""
cls._async_report = fn
def _report(self, value: Any, name: str):
def _report(self, value: Any, name: str, extra: Optional[dict] = None):
if not self.callback_url:
return
data = self._format_data(value, name)
data = self._format_data(value, name, extra)
resp = requests.post(self.callback_url, json=data)
resp.raise_for_status()
return resp.text
async def _async_report(self, value: Any, name: str):
async def _async_report(self, value: Any, name: str, extra: Optional[dict] = None):
if not self.callback_url:
return
data = self._format_data(value, name)
data = self._format_data(value, name, extra)
url = self.callback_url
_result = urlparse(url)
sessiion_kwargs = {}
@ -129,9 +130,16 @@ class ResourceReporter(BaseModel):
resp.raise_for_status()
return await resp.text()
def _format_data(self, value, name):
def _format_data(self, value, name, extra):
data = self.model_dump(mode="json", exclude=("callback_url", "llm_stream"))
data["value"] = str(value) if isinstance(value, Path) else value
if isinstance(value, BaseModel):
value = value.model_dump(mode="json")
elif isinstance(value, Path):
value = str(value)
if name == "path":
value = os.path.abspath(value)
data["value"] = value
data["name"] = name
role = CURRENT_ROLE.get(None)
if role:
@ -139,6 +147,8 @@ class ResourceReporter(BaseModel):
else:
role_name = os.environ.get("METAGPT_ROLE")
data["role"] = role_name
if extra:
data["extra"] = extra
return data
def __enter__(self):
@ -252,6 +262,16 @@ class TaskReporter(ObjectReporter):
block: Literal[BlockType.TASK] = BlockType.TASK
class ThoughtReporter(ObjectReporter):
"""Reporter for object resources to Task Block."""
block: Literal[BlockType.THOUGHT] = BlockType.THOUGHT
async def __aenter__(self):
await self.async_report({})
return await super().__aenter__()
class FileReporter(ResourceReporter):
"""File resource callback for reporting complete file paths.
@ -259,13 +279,23 @@ class FileReporter(ResourceReporter):
if the file can be partially output for display first, use streaming callback.
"""
def report(self, value: Union[Path, dict, Any], name: Literal["path", "meta", "content"] = "path"):
def report(
self,
value: Union[Path, dict, Any],
name: Literal["path", "meta", "content"] = "path",
extra: Optional[dict] = None,
):
"""Report file resource synchronously."""
return super().report(value, name)
return super().report(value, name, extra)
async def async_report(self, value: Path, name: Literal["path", "meta", "content"] = "path"):
async def async_report(
self,
value: Union[Path, dict, Any],
name: Literal["path", "meta", "content"] = "path",
extra: Optional[dict] = None,
):
"""Report file resource asynchronously."""
return await super().async_report(value, name)
return await super().async_report(value, name, extra)
class NotebookReporter(FileReporter):