diff --git a/metagpt/actions/execute_code.py b/metagpt/actions/execute_code.py index c75711e75..5b6cba57d 100644 --- a/metagpt/actions/execute_code.py +++ b/metagpt/actions/execute_code.py @@ -15,9 +15,14 @@ import nbformat from nbclient import NotebookClient from nbclient.exceptions import CellTimeoutError, DeadKernelError from nbformat import NotebookNode -from nbformat.v4 import new_code_cell, new_output +from nbformat.v4 import new_code_cell, new_output, new_markdown_cell from rich.console import Console from rich.syntax import Syntax +from rich.markdown import Markdown +from rich.panel import Panel +from rich.box import MINIMAL +from rich.live import Live +from rich.console import Group from metagpt.actions import Action from metagpt.logs import logger @@ -91,11 +96,17 @@ class ExecutePyCode(ExecuteCode, Action): def add_code_cell(self, code): self.nb.cells.append(new_code_cell(source=code)) + def add_markdown_cell(self, markdown): + self.nb.cells.append(new_markdown_cell(source=markdown)) + def _display(self, code, language: str = "python"): if language == "python": code = Syntax(code, "python", theme="paraiso-dark", line_numbers=True) - self.console.print("\n") self.console.print(code) + elif language == "markdown": + display_markdown(code) + else: + raise ValueError(f"Only support for python, markdown, but got {language}") def add_output_to_cell(self, cell, output): if "outputs" not in cell: @@ -212,26 +223,43 @@ class ExecutePyCode(ExecuteCode, Action): cell_index = len(self.nb.cells) - 1 success, error_message = await self.run_cell(self.nb.cells[-1], cell_index) - if success: - outputs = self.parse_outputs(self.nb.cells[-1].outputs) - return truncate(remove_escape_and_color_codes(outputs)), True - else: - return error_message, False + if not success: + return truncate(remove_escape_and_color_codes(error_message), is_success=success) + + # code success + outputs = self.parse_outputs(self.nb.cells[-1].outputs) + return truncate(remove_escape_and_color_codes(outputs), is_success=success) + elif language == 'markdown': + # markdown + self.add_markdown_cell(code) + return code, True else: - # TODO: markdown - raise NotImplementedError(f"Not support this code type : {language}, Only support code!") + raise ValueError(f"Only support for language: python, markdown, but got {language}, ") -def truncate(result: str, keep_len: int = 2000) -> str: - desc = f"Truncated to show only the last {keep_len} characters\n" +def truncate(result: str, keep_len: int = 2000, is_success: bool = True): + desc = f"Executed code {'successfully' if is_success else 'failed, please reflect the cause of bug and then debug'}" + if is_success: + desc += f"Truncated to show only {keep_len} characters\n" + else: + desc += "Show complete information for you." + if result.startswith(desc): result = result[len(desc) :] if len(result) > keep_len: - result = result[-keep_len:] - return desc + result + result = result[-keep_len:] if not is_success else result + if not result: + result = 'No output about your code. Only when importing packages it is normal case. Recap and go ahead.' + return result, False - return result + if result.strip().startswith(" str: + ) -> dict: # context.append(Message(content=self.REUSE_CODE_INSTRUCTION, role="user")) prompt = self.process_msg(context, system_msg) code_content = await self.llm.aask_code(prompt, **kwargs) - return code_content["code"] + return code_content class WriteCodeWithTools(BaseWriteAnalysisCode): diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 747e36480..3edd89835 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -10,6 +10,7 @@ """ import json +import re from typing import AsyncIterator, Union from openai import APIConnectionError, AsyncOpenAI, AsyncStream @@ -27,7 +28,7 @@ from tenacity import ( from metagpt.config import CONFIG, Config, LLMProviderEnum from metagpt.logs import log_llm_stream, logger from metagpt.provider.base_llm import BaseLLM -from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA, GENERAL_TOOL_CHOICE +from metagpt.provider.constant import GENERAL_FUNCTION_SCHEMA from metagpt.provider.llm_provider_registry import register_provider from metagpt.schema import Message from metagpt.utils.cost_manager import Costs @@ -147,37 +148,41 @@ class OpenAILLM(BaseLLM): def _func_configs(self, messages: list[dict], timeout=3, **kwargs) -> dict: """Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create""" if "tools" not in kwargs: - configs = { - "tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}], - "tool_choice": GENERAL_TOOL_CHOICE, - } + configs = {"tools": [{"type": "function", "function": GENERAL_FUNCTION_SCHEMA}]} kwargs.update(configs) return self._cons_kwargs(messages=messages, timeout=timeout, **kwargs) + def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]: + """convert messages to list[dict].""" + # 全部转成list + if not isinstance(messages, list): + messages = [messages] + + # 转成list[dict] + processed_messages = [] + for msg in messages: + if isinstance(msg, str): + processed_messages.append({"role": "user", "content": msg}) + elif isinstance(msg, dict): + assert set(msg.keys()) == set(["role", "content"]) + processed_messages.append(msg) + elif isinstance(msg, Message): + processed_messages.append(msg.to_dict()) + else: + raise ValueError( + f"Only support message type are: str, Message, dict, but got {type(messages).__name__}!" + ) + return processed_messages + async def _achat_completion_function(self, messages: list[dict], timeout=3, **chat_configs) -> ChatCompletion: + messages = self._process_message(messages) kwargs = self._func_configs(messages=messages, timeout=timeout, **chat_configs) rsp: ChatCompletion = await self.aclient.chat.completions.create(**kwargs) self._update_costs(rsp.usage) return rsp - def _process_message(self, messages: Union[str, Message, list[dict], list[Message], list[str]]) -> list[dict]: - """convert messages to list[dict].""" - if isinstance(messages, list): - messages = [Message(content=msg) if isinstance(msg, str) else msg for msg in messages] - return [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages] - - if isinstance(messages, Message): - messages = [messages.to_dict()] - elif isinstance(messages, str): - messages = [{"role": "user", "content": messages}] - else: - raise ValueError( - f"Only support messages type are: str, Message, list[dict], but got {type(messages).__name__}!" - ) - return messages - - async def aask_code(self, messages: Union[str, Message, list[dict]], **kwargs) -> dict: + async def aask_code(self, messages: list[dict], **kwargs) -> dict: """Use function of tools to ask a code. Note: Keep kwargs consistent with https://platform.openai.com/docs/api-reference/chat/create @@ -187,18 +192,62 @@ class OpenAILLM(BaseLLM): >>> rsp = await llm.aask_code(msg) # -> {'language': 'python', 'code': "print('Hello, World!')"} """ - messages = self._process_message(messages) rsp = await self._achat_completion_function(messages, **kwargs) return self.get_choice_function_arguments(rsp) + def _parse_arguments(self, arguments: str) -> dict: + """parse arguments in openai function call""" + if "langugae" not in arguments and "code" not in arguments: + logger.warning(f"Not found `code`, `language`, We assume it is pure code:\n {arguments}\n. ") + return {"language": "python", "code": arguments} + + # 匹配language + language_pattern = re.compile(r'[\"\']?language[\"\']?\s*:\s*["\']([^"\']+?)["\']', re.DOTALL) + language_match = language_pattern.search(arguments) + language_value = language_match.group(1) if language_match else None + + # 匹配code + code_pattern = r'(["\'`]{3}|["\'`])([\s\S]*?)\1' + try: + code_value = re.findall(code_pattern, arguments)[-1][-1] + except Exception as e: + logger.error(f"{e}, when re.findall({code_pattern}, {arguments})") + code_value = None + + if code_value is None: + raise ValueError(f"Parse code error for {arguments}") + # arguments只有code的情况 + return {"language": language_value, "code": code_value} + @handle_exception def get_choice_function_arguments(self, rsp: ChatCompletion) -> dict: """Required to provide the first function arguments of choice. + :param dict rsp: same as in self.get_choice_function(rsp) :return dict: return the first function arguments of choice, for example, {'language': 'python', 'code': "print('Hello, World!')"} """ - return json.loads(rsp.choices[0].message.tool_calls[0].function.arguments) + message = rsp.choices[0].message + if ( + message.tool_calls is not None + and message.tool_calls[0].function is not None + and message.tool_calls[0].function.arguments is not None + ): + # reponse is code + try: + return json.loads(message.tool_calls[0].function.arguments, strict=False) + except json.decoder.JSONDecodeError as e: + logger.debug( + f"Got JSONDecodeError for {message.tool_calls[0].function.arguments},\ + we will use RegExp to parse code, \n {e}" + ) + return {"language": "python", "code": self._parse_arguments(message.tool_calls[0].function.arguments)} + elif message.tool_calls is None and message.content is not None: + # reponse is message + return {"language": "markdown", "code": self.get_choice_text(rsp)} + else: + logger.error(f"Failed to parse \n {rsp}\n") + raise Exception(f"Failed to parse \n {rsp}\n") def get_choice_text(self, rsp: ChatCompletion) -> str: """Required to provide the first text of choice""" diff --git a/metagpt/roles/code_interpreter.py b/metagpt/roles/code_interpreter.py index 164c7cb12..afd51a575 100644 --- a/metagpt/roles/code_interpreter.py +++ b/metagpt/roles/code_interpreter.py @@ -52,7 +52,7 @@ class CodeInterpreter(Role): async def _act_on_task(self, current_task: Task) -> TaskResult: code, result, is_success = await self._write_and_exec_code() - task_result = TaskResult(code=code, result=result, is_success=is_success) + task_result = TaskResult(code=code['code'], result=result, is_success=is_success) return task_result async def _write_and_exec_code(self, max_retry: int = 3): @@ -63,10 +63,10 @@ class CodeInterpreter(Role): ### write code ### code, cause_by = await self._write_code() - self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by)) + self.working_memory.add(Message(content=code['code'], role="assistant", cause_by=cause_by)) ### execute code ### - result, success = await self.execute_code.run(code) + result, success = await self.execute_code.run(**code) print(result) self.working_memory.add(Message(content=result, role="user", cause_by=ExecutePyCode)) @@ -91,6 +91,9 @@ class CodeInterpreter(Role): context = self.planner.get_useful_memories() code = await todo.run(context=context, plan=self.planner.plan, temperature=0.0) + # 暂时在这里转换 WriteCodeWithTools 的输出 + if isinstance(code, str): + code = {'code': code, 'language': 'python'} return code, todo diff --git a/metagpt/roles/ml_engineer_simple.py b/metagpt/roles/ml_engineer_simple.py index 3f10af8d0..9ff1c9880 100644 --- a/metagpt/roles/ml_engineer_simple.py +++ b/metagpt/roles/ml_engineer_simple.py @@ -75,7 +75,7 @@ class MLEngineerSimple(Role): context = self.get_useful_memories() print(f"memories数量:{len(context)}") # print("===\n" +str(context) + "\n===") - code = await WriteCodeByGenerate().run(context=context, temperature=0.0) + code = await WriteCodeByGenerate().run(context=context, temperature=0.0, only_code=True) cause_by = WriteCodeByGenerate self.working_memory.add(Message(content=code, role="assistant", cause_by=cause_by)) diff --git a/metagpt/tools/__init__.py b/metagpt/tools/__init__.py index 84b9cbd12..222edf312 100644 --- a/metagpt/tools/__init__.py +++ b/metagpt/tools/__init__.py @@ -16,7 +16,7 @@ from metagpt.prompts.tool_type import ( FEATURE_ENGINEERING_PROMPT, MODEL_TRAIN_PROMPT, MODEL_EVALUATE_PROMPT, - VISION_PROMPT + VISION_PROMPT, ) @@ -77,6 +77,12 @@ TOOL_TYPE_MAPPINGS = { desc="Related to text2image, image2image using stable diffusion model.", usage_prompt="", ), + "scrape_web": ToolType( + name="scrape_web", + module="metagpt.tools.functions.libs.scrape_web.scrape_web", + desc="Scrape data from web page.", + usage_prompt="", + ), "vision": ToolType( name="vision", module=str(TOOL_LIBS_PATH / "vision"), diff --git a/metagpt/tools/functions/libs/scrape_web/__init__.py b/metagpt/tools/functions/libs/scrape_web/__init__.py new file mode 100644 index 000000000..d5cd1524b --- /dev/null +++ b/metagpt/tools/functions/libs/scrape_web/__init__.py @@ -0,0 +1 @@ +from metagpt.tools.functions.libs.scrape_web.scrape_web import scrape_web diff --git a/metagpt/tools/functions/libs/scrape_web/scrape_web.py b/metagpt/tools/functions/libs/scrape_web/scrape_web.py new file mode 100644 index 000000000..e68ce0e64 --- /dev/null +++ b/metagpt/tools/functions/libs/scrape_web/scrape_web.py @@ -0,0 +1,23 @@ +import asyncio + +from metagpt.tools.web_browser_engine_playwright import PlaywrightWrapper + + +async def scrape_web(url, *urls): + """ + Scrape and save the HTML structure and inner text content of a web page using Playwright. + + Args: + url (str): The main URL to fetch inner text from. + *urls (str): Additional URLs to fetch inner text from. + + Returns: + (dict): The inner text content and html structure of the web page, key are : 'inner_text', 'html'. + """ + # Create a PlaywrightWrapper instance for the Chromium browser + web = await PlaywrightWrapper("chromium").run(url, *urls) + + # Return the inner text content of the web page + return {"inner_text": web.inner_text, "html": web.html} + +# 需要改三个地方: yaml, 对应路径下init, MetaGPT/metagpt/prompts/ml_engineer.py中ML_MODULE_MAP diff --git a/metagpt/tools/functions/schemas/scrape_web.yml b/metagpt/tools/functions/schemas/scrape_web.yml new file mode 100644 index 000000000..ecca3fbed --- /dev/null +++ b/metagpt/tools/functions/schemas/scrape_web.yml @@ -0,0 +1,21 @@ +scrape_web: + type: async funciton + description: "Scrape and save the HTML structure and inner text content of a web page using Playwright." + parameters: + properties: + url: + type: str + description: "web url" + \*url: + type: Non-Keyword Arguments + description: "other web urls, you can assagin sub url link to it." + required: + - url + returns: + inner_text: + type: str + description: The inner text content of the web page. + html: + type: str + description: The html structure of the web page. + diff --git a/requirements.txt b/requirements.txt index 7ef6d884e..016c2f5d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -65,3 +65,4 @@ networkx~=3.2.1 google-generativeai==0.3.2 # playwright==1.40.0 # playwright extras require anytree +ipywidgets==8.1.1