mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-04-28 18:36:22 +02:00
feat: merge gitlab:mgx_ops
This commit is contained in:
commit
5a001a501d
17 changed files with 215 additions and 44 deletions
|
|
@ -646,7 +646,7 @@ def role_raise_decorator(func):
|
|||
raise Exception(format_trackback_info(limit=None))
|
||||
except Exception as e:
|
||||
if self.latest_observed_msg:
|
||||
logger.warning(
|
||||
logger.exception(
|
||||
"There is a exception in role's execution, in order to resume, "
|
||||
"we delete the newest role communication message in the role's memory."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
import aiofiles
|
||||
from fsspec.implementations.memory import MemoryFileSystem as _MemoryFileSystem
|
||||
|
||||
from metagpt.logs import logger
|
||||
from metagpt.utils.exceptions import handle_exception
|
||||
|
|
@ -68,3 +69,10 @@ class File:
|
|||
content = b"".join(chunks)
|
||||
logger.debug(f"Successfully read file, the path of file: {file_path}")
|
||||
return content
|
||||
|
||||
|
||||
class MemoryFileSystem(_MemoryFileSystem):
|
||||
|
||||
@classmethod
|
||||
def _strip_protocol(cls, path):
|
||||
return super()._strip_protocol(str(path))
|
||||
|
|
|
|||
|
|
@ -7,6 +7,8 @@ from urllib.parse import urljoin, urlparse
|
|||
from bs4 import BeautifulSoup
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
|
||||
import htmlmin
|
||||
|
||||
|
||||
class WebPage(BaseModel):
|
||||
inner_text: str
|
||||
|
|
@ -38,6 +40,22 @@ class WebPage(BaseModel):
|
|||
elif url.startswith(("http://", "https://")):
|
||||
yield urljoin(self.url, url)
|
||||
|
||||
def get_slim_soup(self, keep_links: bool = False):
|
||||
soup = _get_soup(self.html)
|
||||
keep_attrs = ["class"]
|
||||
if keep_links:
|
||||
keep_attrs.append("href")
|
||||
|
||||
for i in soup.find_all(True):
|
||||
for name in list(i.attrs):
|
||||
if i[name] and name not in keep_attrs:
|
||||
del i[name]
|
||||
|
||||
for i in soup.find_all(["svg", "img", "video", "audio"]):
|
||||
i.decompose()
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
def get_html_content(page: str, base: str):
|
||||
soup = _get_soup(page)
|
||||
|
|
@ -48,7 +66,12 @@ def get_html_content(page: str, base: str):
|
|||
def _get_soup(page: str):
|
||||
soup = BeautifulSoup(page, "html.parser")
|
||||
# https://stackoverflow.com/questions/1936466/how-to-scrape-only-visible-webpage-text-with-beautifulsoup
|
||||
for s in soup(["style", "script", "[document]", "head", "title"]):
|
||||
for s in soup(["style", "script", "[document]", "head", "title", "footer"]):
|
||||
s.extract()
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
def simplify_html(html: str, url: str, keep_links: bool = False):
|
||||
html = WebPage(inner_text="", html=html, url=url).get_slim_soup(keep_links).decode()
|
||||
return htmlmin.minify(html, remove_comments=True, remove_empty_space=True)
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ class BlockType(str, Enum):
|
|||
GALLERY = "Gallery"
|
||||
NOTEBOOK = "Notebook"
|
||||
DOCS = "Docs"
|
||||
THOUGHT = "Thought"
|
||||
|
||||
|
||||
END_MARKER_NAME = "end_marker"
|
||||
|
|
@ -55,23 +56,23 @@ class ResourceReporter(BaseModel):
|
|||
callback_url: str = Field(METAGPT_REPORTER_DEFAULT_URL, description="The URL to which the report should be sent")
|
||||
_llm_task: Optional[asyncio.Task] = PrivateAttr(None)
|
||||
|
||||
def report(self, value: Any, name: str):
|
||||
def report(self, value: Any, name: str, extra: Optional[dict] = None):
|
||||
"""Synchronously report resource observation data.
|
||||
|
||||
Args:
|
||||
value: The data to report.
|
||||
name: The type name of the data.
|
||||
"""
|
||||
return self._report(value, name)
|
||||
return self._report(value, name, extra)
|
||||
|
||||
async def async_report(self, value: Any, name: str):
|
||||
async def async_report(self, value: Any, name: str, extra: Optional[dict] = None):
|
||||
"""Asynchronously report resource observation data.
|
||||
|
||||
Args:
|
||||
value: The data to report.
|
||||
name: The type name of the data.
|
||||
"""
|
||||
return await self._async_report(value, name)
|
||||
return await self._async_report(value, name, extra)
|
||||
|
||||
@classmethod
|
||||
def set_report_fn(cls, fn: Callable):
|
||||
|
|
@ -100,20 +101,20 @@ class ResourceReporter(BaseModel):
|
|||
"""
|
||||
cls._async_report = fn
|
||||
|
||||
def _report(self, value: Any, name: str):
|
||||
def _report(self, value: Any, name: str, extra: Optional[dict] = None):
|
||||
if not self.callback_url:
|
||||
return
|
||||
|
||||
data = self._format_data(value, name)
|
||||
data = self._format_data(value, name, extra)
|
||||
resp = requests.post(self.callback_url, json=data)
|
||||
resp.raise_for_status()
|
||||
return resp.text
|
||||
|
||||
async def _async_report(self, value: Any, name: str):
|
||||
async def _async_report(self, value: Any, name: str, extra: Optional[dict] = None):
|
||||
if not self.callback_url:
|
||||
return
|
||||
|
||||
data = self._format_data(value, name)
|
||||
data = self._format_data(value, name, extra)
|
||||
url = self.callback_url
|
||||
_result = urlparse(url)
|
||||
sessiion_kwargs = {}
|
||||
|
|
@ -129,9 +130,16 @@ class ResourceReporter(BaseModel):
|
|||
resp.raise_for_status()
|
||||
return await resp.text()
|
||||
|
||||
def _format_data(self, value, name):
|
||||
def _format_data(self, value, name, extra):
|
||||
data = self.model_dump(mode="json", exclude=("callback_url", "llm_stream"))
|
||||
data["value"] = str(value) if isinstance(value, Path) else value
|
||||
if isinstance(value, BaseModel):
|
||||
value = value.model_dump(mode="json")
|
||||
elif isinstance(value, Path):
|
||||
value = str(value)
|
||||
|
||||
if name == "path":
|
||||
value = os.path.abspath(value)
|
||||
data["value"] = value
|
||||
data["name"] = name
|
||||
role = CURRENT_ROLE.get(None)
|
||||
if role:
|
||||
|
|
@ -139,6 +147,8 @@ class ResourceReporter(BaseModel):
|
|||
else:
|
||||
role_name = os.environ.get("METAGPT_ROLE")
|
||||
data["role"] = role_name
|
||||
if extra:
|
||||
data["extra"] = extra
|
||||
return data
|
||||
|
||||
def __enter__(self):
|
||||
|
|
@ -252,6 +262,16 @@ class TaskReporter(ObjectReporter):
|
|||
block: Literal[BlockType.TASK] = BlockType.TASK
|
||||
|
||||
|
||||
class ThoughtReporter(ObjectReporter):
|
||||
"""Reporter for object resources to Task Block."""
|
||||
|
||||
block: Literal[BlockType.THOUGHT] = BlockType.THOUGHT
|
||||
|
||||
async def __aenter__(self):
|
||||
await self.async_report({})
|
||||
return await super().__aenter__()
|
||||
|
||||
|
||||
class FileReporter(ResourceReporter):
|
||||
"""File resource callback for reporting complete file paths.
|
||||
|
||||
|
|
@ -259,13 +279,23 @@ class FileReporter(ResourceReporter):
|
|||
if the file can be partially output for display first, use streaming callback.
|
||||
"""
|
||||
|
||||
def report(self, value: Union[Path, dict, Any], name: Literal["path", "meta", "content"] = "path"):
|
||||
def report(
|
||||
self,
|
||||
value: Union[Path, dict, Any],
|
||||
name: Literal["path", "meta", "content"] = "path",
|
||||
extra: Optional[dict] = None,
|
||||
):
|
||||
"""Report file resource synchronously."""
|
||||
return super().report(value, name)
|
||||
return super().report(value, name, extra)
|
||||
|
||||
async def async_report(self, value: Path, name: Literal["path", "meta", "content"] = "path"):
|
||||
async def async_report(
|
||||
self,
|
||||
value: Union[Path, dict, Any],
|
||||
name: Literal["path", "meta", "content"] = "path",
|
||||
extra: Optional[dict] = None,
|
||||
):
|
||||
"""Report file resource asynchronously."""
|
||||
return await super().async_report(value, name)
|
||||
return await super().async_report(value, name, extra)
|
||||
|
||||
|
||||
class NotebookReporter(FileReporter):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue