Merge branch 'update-code-intepreter-by-auto-aask' into 'code_intepreter'

Update code_intepreter by auto aask_code See merge request agents/data_agents_opt!49
2026-06-08 15:05:17 +02:00 · 2024-01-18 13:16:01 +00:00 · 2024-01-18 13:16:01 +00:00 · 224bf820b2
commit 224bf820b2
parent 42a106ca26 d78db8994c
10 changed files with 205 additions and 45 deletions
--- a/metagpt/actions/execute_code.py
+++ b/metagpt/actions/execute_code.py
@ -15,9 +15,14 @@ import nbformat
 from nbclient import NotebookClient
 from nbclient.exceptions import CellTimeoutError, DeadKernelError
 from nbformat import NotebookNode
-from nbformat.v4 import new_code_cell, new_output
+from nbformat.v4 import new_code_cell, new_output, new_markdown_cell
 from rich.console import Console
 from rich.syntax import Syntax
+from rich.markdown import Markdown
+from rich.panel import Panel
+from rich.box import MINIMAL
+from rich.live import Live
+from rich.console import Group

 from metagpt.actions import Action
 from metagpt.logs import logger
@ -91,11 +96,17 @@ class ExecutePyCode(ExecuteCode, Action):
    def add_code_cell(self, code):
        self.nb.cells.append(new_code_cell(source=code))

+    def add_markdown_cell(self, markdown):
+        self.nb.cells.append(new_markdown_cell(source=markdown))
+
    def _display(self, code, language: str = "python"):
        if language == "python":
            code = Syntax(code, "python", theme="paraiso-dark", line_numbers=True)
-            self.console.print("\n")
            self.console.print(code)
+        elif language == "markdown":
+            display_markdown(code)
+        else:
+            raise ValueError(f"Only support for python, markdown, but got {language}")

    def add_output_to_cell(self, cell, output):
        if "outputs" not in cell:
@ -212,26 +223,43 @@ class ExecutePyCode(ExecuteCode, Action):
            cell_index = len(self.nb.cells) - 1
            success, error_message = await self.run_cell(self.nb.cells[-1], cell_index)

-            if success:
-                outputs = self.parse_outputs(self.nb.cells[-1].outputs)
-                return truncate(remove_escape_and_color_codes(outputs)), True
-            else:
-                return error_message, False
+            if not success:
+                return truncate(remove_escape_and_color_codes(error_message), is_success=success)
+
+            # code success
+            outputs = self.parse_outputs(self.nb.cells[-1].outputs)
+            return truncate(remove_escape_and_color_codes(outputs), is_success=success)
+        elif language == 'markdown':
+            # markdown
+            self.add_markdown_cell(code)
+            return code, True
        else:
-            # TODO: markdown
-            raise NotImplementedError(f"Not support this code type : {language}, Only support code!")
+            raise ValueError(f"Only support for language: python, markdown, but got {language}, ")


-def truncate(result: str, keep_len: int = 2000) -> str:
-    desc = f"Truncated to show only the last {keep_len} characters\n"
+def truncate(result: str, keep_len: int = 2000, is_success: bool = True):
+    desc = f"Executed code {'successfully' if is_success else 'failed, please reflect the cause of bug and then debug'}"
+    if is_success:
+        desc += f"Truncated to show only {keep_len} characters\n"
+    else:
+        desc += "Show complete information for you."
+
    if result.startswith(desc):
        result = result[len(desc) :]

    if len(result) > keep_len:
-        result = result[-keep_len:]
-        return desc + result
+        result = result[-keep_len:] if not is_success else result
+        if not result:
+            result = 'No output about your code. Only when importing packages it is normal case. Recap and go ahead.'
+            return result, False

-    return result
+        if result.strip().startswith("<coroutine object"):
+            result = "Executed code failed, you need use key word 'await' to run a async code."
+            return result, False
+
+        return desc + result[:keep_len+500], is_success
+
+    return result, is_success


 def remove_escape_and_color_codes(input_str):
@ -239,3 +267,31 @@ def remove_escape_and_color_codes(input_str):
    pattern = re.compile(r"\x1b\[[0-9;]*[mK]")
    result = pattern.sub("", input_str)
    return result
+
+
+def display_markdown(content: str):
+    # 使用正则表达式逐个匹配代码块
+    matches = re.finditer(r'```(.+?)```', content, re.DOTALL)
+    start_index = 0
+    content_panels = []
+    # 逐个打印匹配到的文本和代码
+    for match in matches:
+        text_content = content[start_index:match.start()].strip()
+        code_content = match.group(0).strip()[3:-3]           # Remove triple backticks
+
+        if text_content:
+            content_panels.append(Panel(Markdown(text_content), box=MINIMAL))
+
+        if code_content:
+            content_panels.append(Panel(Markdown(f"```{code_content}"), box=MINIMAL))
+        start_index = match.end()
+
+    # 打印剩余文本（如果有）
+    remaining_text = content[start_index:].strip()
+    if remaining_text:
+        content_panels.append(Panel(Markdown(remaining_text), box=MINIMAL))
+
+    # 在Live模式中显示所有Panel
+    with Live(auto_refresh=False, console=Console(), vertical_overflow="visible") as live:
+        live.update(Group(*content_panels))
+        live.refresh()
--- a/metagpt/actions/write_analysis_code.py
+++ b/metagpt/actions/write_analysis_code.py
@ -85,11 +85,11 @@ class WriteCodeByGenerate(BaseWriteAnalysisCode):
        plan: Plan = None,
        system_msg: str = None,
        **kwargs,
-    ) -> str:
+    ) -> dict:
        # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user"))
        prompt = self.process_msg(context, system_msg)
        code_content = await self.llm.aask_code(prompt, **kwargs)
-        return code_content["code"]
+        return code_content


 class WriteCodeWithTools(BaseWriteAnalysisCode):
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -10,6 +10,7 @@
 """

 import json
+import re
 from typing import AsyncIterator, Union

 from openai import APIConnectionError, AsyncOpenAI, AsyncStream
@ -27,7 +28,7 @@ from tenacity import (
 from metagpt.config import CONFIG, Config, LLMProviderEnum
 from metagpt.logs import log_llm_stream, logger
 from metagpt.provider.base_llm import BaseLLM
-from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA, GENERAL_TOOL_CHOICE
+from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.schema import Message
 from metagpt.utils.cost_manager import Costs
@ -147,37 +148,41 @@ class OpenAILLM(BaseLLM):
    def _func_configs(self, messages: list[dict], timeout=3, **kwargs) -> dict:
        """Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create"""
        if "tools" not in kwargs:
-            configs = {
-                "tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}],
-                "tool_choice": GENERAL_TOOL_CHOICE,
-            }
+            configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]}
            kwargs.update(configs)

        return self._cons_kwargs(messages=messages, timeout=timeout, **kwargs)

+    def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
+        """convert messages to list[dict]."""
+        # 全部转成list
+        if not isinstance(messages, list):
+            messages = [messages]
+
+        # 转成list[dict]
+        processed_messages = []
+        for msg in messages:
+            if isinstance(msg, str):
+                processed_messages.append({"role": "user", "content": msg})
+            elif isinstance(msg, dict):
+                assert set(msg.keys()) == set(["role", "content"])
+                processed_messages.append(msg)
+            elif isinstance(msg, Message):
+                processed_messages.append(msg.to_dict())
+            else:
+                raise ValueError(
+                    f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!"
+                )
+        return processed_messages
+
    async def _achat_completion_function(self, messages: list[dict], timeout=3, **chat_configs) -> ChatCompletion:
+        messages = self._process_message(messages)
        kwargs = self._func_configs(messages=messages, timeout=timeout, **chat_configs)
        rsp: ChatCompletion = await self.aclient.chat.completions.create(**kwargs)
        self._update_costs(rsp.usage)
        return rsp

-    def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]:
-        """convert messages to list[dict]."""
-        if isinstance(messages, list):
-            messages = [Message(content=msg) if isinstance(msg, str) else msg for msg in messages]
-            return [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages]
-
-        if isinstance(messages, Message):
-            messages = [messages.to_dict()]
-        elif isinstance(messages, str):
-            messages = [{"role": "user", "content": messages}]
-        else:
-            raise ValueError(
-                f"Only support messages type are: str, Message, list[dict], but got {type(messages).__name__}!"
-            )
-        return messages
-
-    async def aask_code(self, messages: Union[str, Message, list[dict]], **kwargs) -> dict:
+    async def aask_code(self, messages: list[dict], **kwargs) -> dict:
        """Use function of tools to ask a code.
        Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create

@ -187,18 +192,62 @@ class OpenAILLM(BaseLLM):
        >>> rsp = await llm.aask_code(msg)
        # -> {'language': 'python', 'code': "print('Hello, World!')"}
        """
-        messages = self._process_message(messages)
        rsp = await self._achat_completion_function(messages, **kwargs)
        return self.get_choice_function_arguments(rsp)

+    def _parse_arguments(self, arguments: str) -> dict:
+        """parse arguments in openai function call"""
+        if "langugae" not in arguments and "code" not in arguments:
+            logger.warning(f"Not found `code`, `language`, We assume it is pure code:\n {arguments}\n. ")
+            return {"language": "python", "code": arguments}
+
+        # 匹配language
+        language_pattern = re.compile(r'[\"\']?language[\"\']?\s*:\s*["\']([^"\']+?)["\']', re.DOTALL)
+        language_match = language_pattern.search(arguments)
+        language_value = language_match.group(1) if language_match else None
+
+        # 匹配code
+        code_pattern = r'(["\'`]{3}|["\'`])([\s\S]*?)\1'
+        try:
+            code_value = re.findall(code_pattern, arguments)[-1][-1]
+        except Exception as e:
+            logger.error(f"{e}, when re.findall({code_pattern}, {arguments})")
+            code_value = None
+
+        if code_value is None:
+            raise ValueError(f"Parse code error for {arguments}")
+        # arguments只有code的情况
+        return {"language": language_value, "code": code_value}
+
    @handle_exception
    def get_choice_function_arguments(self, rsp: ChatCompletion) -> dict:
        """Required to provide the first function arguments of choice.

+        :param dict rsp: same as in self.get_choice_function(rsp)
        :return dict: return the first function arguments of choice, for example,
            {'language': 'python', 'code': "print('Hello, World!')"}
        """
-        return json.loads(rsp.choices[0].message.tool_calls[0].function.arguments)
+        message = rsp.choices[0].message
+        if (
+            message.tool_calls is not None
+            and message.tool_calls[0].function is not None
+            and message.tool_calls[0].function.arguments is not None
+        ):
+            # reponse is code
+            try:
+                return json.loads(message.tool_calls[0].function.arguments, strict=False)
+            except json.decoder.JSONDecodeError as e:
+                logger.debug(
+                    f"Got JSONDecodeError for {message.tool_calls[0].function.arguments},\
+                    we will use RegExp to parse code, \n {e}"
+                )
+                return {"language": "python", "code": self._parse_arguments(message.tool_calls[0].function.arguments)}
+        elif message.tool_calls is None and message.content is not None:
+            # reponse is message
+            return {"language": "markdown", "code": self.get_choice_text(rsp)}
+        else:
+            logger.error(f"Failed to parse \n {rsp}\n")
+            raise Exception(f"Failed to parse \n {rsp}\n")

    def get_choice_text(self, rsp: ChatCompletion) -> str:
        """Required to provide the first text of choice"""
--- a/metagpt/roles/code_interpreter.py
+++ b/metagpt/roles/code_interpreter.py
@ -52,7 +52,7 @@ class CodeInterpreter(Role):

    async def _act_on_task(self, current_task: Task) -> TaskResult:
        code, result, is_success = await self._write_and_exec_code()
-        task_result = TaskResult(code=code, result=result, is_success=is_success)
+        task_result = TaskResult(code=code['code'], result=result, is_success=is_success)
        return task_result

    async def _write_and_exec_code(self, max_retry: int = 3):
@ -63,10 +63,10 @@ class CodeInterpreter(Role):
            ### write code ###
            code, cause_by = await self._write_code()

-            self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by))
+            self.working_memory.add(Message(content=code['code'], role="assistant", cause_by=cause_by))

            ### execute code ###
-            result, success = await self.execute_code.run(code)
+            result, success = await self.execute_code.run(**code)
            print(result)

            self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode))
@ -91,6 +91,9 @@ class CodeInterpreter(Role):

        context = self.planner.get_useful_memories()
        code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0)
+        # 暂时在这里转换 WriteCodeWithTools 的输出
+        if isinstance(code, str):
+            code = {'code': code, 'language': 'python'}

        return code, todo

--- a/metagpt/roles/ml_engineer_simple.py
+++ b/metagpt/roles/ml_engineer_simple.py
@ -75,7 +75,7 @@ class MLEngineerSimple(Role):
            context = self.get_useful_memories()
            print(f"memories数量：{len(context)}")
            # print("===\n" +str(context) + "\n===")
-            code = await WriteCodeByGenerate().run(context=context, temperature=0.0)
+            code = await WriteCodeByGenerate().run(context=context, temperature=0.0, only_code=True)
            cause_by = WriteCodeByGenerate
            self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by))

--- a/metagpt/tools/init.py
+++ b/metagpt/tools/init.py
@ -16,7 +16,7 @@ from metagpt.prompts.tool_type import (
    FEATURE_ENGINEERING_PROMPT,
    MODEL_TRAIN_PROMPT,
    MODEL_EVALUATE_PROMPT,
-    VISION_PROMPT
+    VISION_PROMPT,
 )


@ -77,6 +77,12 @@ TOOL_TYPE_MAPPINGS = {
        desc="Related to text2image, image2image using stable diffusion model.",
        usage_prompt="",
    ),
+    "scrape_web": ToolType(
+        name="scrape_web",
+        module="metagpt.tools.functions.libs.scrape_web.scrape_web",
+        desc="Scrape data from web page.",
+        usage_prompt="",
+    ),
    "vision": ToolType(
        name="vision",
        module=str(TOOL_LIBS_PATH / "vision"),
--- a/metagpt/tools/functions/libs/scrape_web/init.py
+++ b/metagpt/tools/functions/libs/scrape_web/init.py
@ -0,0 +1 @@
+from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web
--- a/metagpt/tools/functions/libs/scrape_web/scrape_web.py
+++ b/metagpt/tools/functions/libs/scrape_web/scrape_web.py
@ -0,0 +1,23 @@
+import asyncio
+
+from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper
+
+
+async def scrape_web(url, *urls):
+    """
+    Scrape and save the HTML structure and inner text content of a web page using Playwright.
+
+    Args:
+        url (str): The main URL to fetch inner text from.
+        *urls (str): Additional URLs to fetch inner text from.
+
+    Returns:
+        (dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'.
+    """
+    # Create a PlaywrightWrapper instance for the Chromium browser
+    web = await PlaywrightWrapper("chromium").run(url, *urls)
+
+    # Return the inner text content of the web page
+    return {"inner_text": web.inner_text, "html": web.html}
+
+# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP
--- a/metagpt/tools/functions/schemas/scrape_web.yml
+++ b/metagpt/tools/functions/schemas/scrape_web.yml
@ -0,0 +1,21 @@
+scrape_web:
+  type: async funciton
+  description: "Scrape and save the HTML structure and inner text content of a web page using Playwright."
+  parameters:
+    properties:
+      url:
+        type: str
+        description: "web url"
+      \*url:
+        type: Non-Keyword Arguments
+        description: "other web urls, you can assagin sub url link to it."
+    required:
+      - url
+  returns:
+    inner_text:
+      type: str
+      description: The inner text content of the web page.
+    html:
+      type: str
+      description: The html structure of the web page.
+
--- a/requirements.txt
+++ b/requirements.txt
@ -65,3 +65,4 @@ networkx~=3.2.1
 google-generativeai==0.3.2
 # playwright==1.40.0  # playwright extras require
 anytree
+ipywidgets==8.1.1
				`@ -0,0 +1 @@`
				`from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web`