Merge branch 'main' into feat_qianfan

2026-07-17 16:41:05 +02:00 · 2024-02-07 19:04:09 +08:00 · 2024-02-07 19:04:09 +08:00 · 82557dfe81
commit 82557dfe81
parent 997e25e97d a54e18f8ca
18 changed files with 69 additions and 34 deletions
--- a/config/config2.yaml.example
+++ b/config/config2.yaml.example
@ -1,5 +1,5 @@
 llm:
-  api_type: "openai"
+  api_type: "openai"  # or azure / ollama etc.
  base_url: "YOUR_BASE_URL"
  api_key: "YOUR_API_KEY"
  model: "gpt-4-turbo-preview"  # or gpt-3.5-turbo-1106 / gpt-4-1106-preview
--- a/examples/llm_vision.py
+++ b/examples/llm_vision.py
@ -0,0 +1,23 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# @Desc   : example to run the ability of LLM vision
+
+import asyncio
+from pathlib import Path
+
+from metagpt.llm import LLM
+from metagpt.utils.common import encode_image
+
+
+async def main():
+    llm = LLM()
+
+    # check if the configured llm supports llm-vision capacity. If not, it will throw a error
+    invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
+    img_base64 = encode_image(invoice_path)
+    res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
+    assert "true" in res.lower()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/metagpt/actions/research.py
+++ b/metagpt/actions/research.py
@ -133,7 +133,7 @@ class CollectLinks(Action):
                if len(remove) == 0:
                    break

-        model_name = config.get_openai_llm().model
+        model_name = config.llm.model
        prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
        logger.debug(prompt)
        queries = await self._aask(prompt, [system_text])
--- a/metagpt/provider/general_api_requestor.py
+++ b/metagpt/provider/general_api_requestor.py
@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
        self, result: requests.Response, stream: bool
    ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
        """Returns the response(s) and a bool indicating whether it is a stream."""
-        if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
+        content_type = result.headers.get("Content-Type", "")
+        if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
            return (
                self._interpret_response_line(line, result.status_code, result.headers, stream=True)
                for line in parse_stream(result.iter_lines())
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -233,14 +233,16 @@ class OpenAILLM(BaseLLM):
            usage.prompt_tokens = count_message_tokens(messages, self.model)
            usage.completion_tokens = count_string_tokens(rsp, self.model)
        except Exception as e:
-            logger.error(f"usage calculation failed: {e}")
+            logger.warning(f"usage calculation failed: {e}")

        return usage

    def _get_max_tokens(self, messages: list[dict]):
        if not self.auto_max_tokens:
            return self.config.max_token
-        return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
+        # FIXME
+        # https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
+        return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)

    @handle_exception
    async def amoderation(self, content: Union[str, list[str]]):
--- a/metagpt/provider/zhipuai_api.py
+++ b/metagpt/provider/zhipuai_api.py
@ -3,9 +3,8 @@
 # @Desc   : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk

 from enum import Enum
+from typing import Optional

-import openai
-import zhipuai
 from requests import ConnectionError
 from tenacity import (
    after_log,
@ -14,6 +13,7 @@ from tenacity import (
    stop_after_attempt,
    wait_random_exponential,
 )
+from zhipuai.types.chat.chat_completion import Completion

 from metagpt.configs.llm_config import LLMConfig, LLMType
 from metagpt.logs import log_llm_stream, logger
@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
 from metagpt.provider.llm_provider_registry import register_provider
 from metagpt.provider.openai_api import log_and_reraise
 from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
+from metagpt.utils.cost_manager import CostManager


 class ZhiPuEvent(Enum):
@ -38,27 +39,22 @@ class ZhiPuAILLM(BaseLLM):
    """

    def __init__(self, config: LLMConfig):
-        self.__init_zhipuai(config)
-        self.llm = ZhiPuModelAPI
-        self.model = "chatglm_turbo"  # so far only one model, just use it
-        self.use_system_prompt: bool = False  # zhipuai has no system prompt when use api
        self.config = config
+        self.__init_zhipuai()
+        self.cost_manager: Optional[CostManager] = None

-    def __init_zhipuai(self, config: LLMConfig):
-        assert config.api_key
-        zhipuai.api_key = config.api_key
-        # due to use openai sdk, set the api_key but it will't be used.
-        # openai.api_key = zhipuai.api_key  # due to use openai sdk, set the api_key but it will't be used.
-        if config.proxy:
-            # FIXME: openai v1.x sdk has no proxy support
-            openai.proxy = config.proxy
+    def __init_zhipuai(self):
+        assert self.config.api_key
+        self.api_key = self.config.api_key
+        self.model = self.config.model  # so far, it support glm-3-turbo、glm-4
+        self.llm = ZhiPuModelAPI(api_key=self.api_key)

    def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
        kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
        return kwargs

    def completion(self, messages: list[dict], timeout=3) -> dict:
-        resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
+        resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
        usage = resp.usage.model_dump()
        self._update_costs(usage)
        return resp.model_dump()
--- a/metagpt/roles/ci/code_interpreter.py
+++ b/metagpt/roles/ci/code_interpreter.py
@ -72,11 +72,7 @@ class CodeInterpreter(Role):
                if ReviewConst.CHANGE_WORDS[0] in review:
                    counter = 0  # redo the task again with help of human suggestions

-        py_code = (
-            code["code"] if code.get("language") == "python" else ""
-        )  # use python code as final code; for markdown, return the rendered result instead of the code itself
-
-        return py_code, result, success
+        return code["code"], result, success

    async def _write_code(self):
        todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)
--- a/metagpt/utils/cost_manager.py
+++ b/metagpt/utils/cost_manager.py
@ -42,6 +42,10 @@ class CostManager(BaseModel):
        """
        self.total_prompt_tokens += prompt_tokens
        self.total_completion_tokens += completion_tokens
+        if model not in TOKEN_COSTS:
+            logger.warning(f"Model {model} not found in TOKEN_COSTS.")
+            return
+
        cost = (
            prompt_tokens * self.token_costs[model]["prompt"]
            + completion_tokens * self.token_costs[model]["completion"]
--- a/metagpt/utils/text.py
+++ b/metagpt/utils/text.py
@ -25,7 +25,7 @@ def reduce_message_length(
    """
    max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
    for msg in msgs:
-        if count_string_tokens(msg, model_name) < max_token:
+        if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
            return msg

    raise RuntimeError("fail to reduce message length")
@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
            continue
        ret = ["".join(j) for j in _split_by_count(sentences, count)]
        return ret
-    return _split_by_count(paragraph, count)
+    return list(_split_by_count(paragraph, count))


 def decode_unicode_escape(text: str) -> str:
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -32,8 +32,8 @@ TOKEN_COSTS = {
    "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
-    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
-    "glm-4": {"prompt": 0.0, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
+    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
+    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
    "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
 }

@ -111,7 +111,8 @@ TOKEN_MAX = {
    "gpt-4-vision-preview": 128000,
    "gpt-4-1106-vision-preview": 128000,
    "text-embedding-ada-002": 8192,
-    "chatglm_turbo": 32768,
+    "glm-3-turbo": 128000,
+    "glm-4": 128000,
    "gemini-pro": 32768,
 }

--- a/requirements.txt
+++ b/requirements.txt
@ -63,7 +63,7 @@ gitignore-parser==0.1.9
 websockets~=12.0
 networkx~=3.2.1
 google-generativeai==0.3.2
-# playwright==1.40.0  # playwright extras require
+playwright>=1.26  # used at metagpt/tools/libs/web_scraping.py
 anytree
 ipywidgets==8.1.1
 Pillow
--- a/setup.py
+++ b/setup.py
@ -24,7 +24,6 @@ requirements = (here / "requirements.txt").read_text(encoding="utf-8").splitline


 extras_require = {
-    "playwright": ["playwright>=1.26", "beautifulsoup4"],
    "selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"],
    "search-google": ["google-api-python-client==2.94.0"],
    "search-ddg": ["duckduckgo-search~=4.1.1"],
--- a/tests/data/rsp_cache.json
+++ b/tests/data/rsp_cache.json
@ -389,5 +389,11 @@
        "reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.",
        "improved_impl": "def sort_array(arr):\n    # Helper function to count the number of ones in the binary representation\n    def count_ones(n):\n        return bin(n).count('1')\n    \n    # Sort the array using a custom key function\n    # The key function returns a tuple (number of ones, value) for each element\n    # This ensures that if two elements have the same number of ones, they are sorted by their value\n    # We use a stable sorting algorithm to maintain the relative order of elements with the same number of ones\n    sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x), reverse=True)\n    sorted_arr = sorted(sorted_arr, key=count_ones)\n    \n    return sorted_arr\n"
    },
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import pandas as pd\n\n# Convert the Iris dataset to a DataFrame for easier manipulation\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\niris_df['target'] = iris_data.target\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\n\n# Display basic information about the dataset\niris_df.info()\n\n# Display statistical summary of the dataset\niris_df.describe()\n\n# Display the first few rows of the dataset\ndisplay(iris_df.head())"
+    },
+    "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n    {\\n        \\\"task_id\\\": \\\"1\\\",\\n        \\\"dependent_task_ids\\\": [],\\n        \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"2\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"1\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"eda\\\",\\n        \\\"code\\\": \\\"import pandas as pd\\\\n\\\\n# Convert the Iris dataset to a DataFrame for easier manipulation\\\\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\\\\niris_df['target'] = iris_data.target\\\\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\\\\n\\\\n# Display basic information about the dataset\\\\niris_df.info()\\\\n\\\\n# Display statistical summary of the dataset\\\\niris_df.describe()\\\\n\\\\n# Display the first few rows of the dataset\\\\ndisplay(iris_df.head())\\\",\\n        \\\"result\\\": \\\"a successful run\\\",\\n        \\\"is_success\\\": true,\\n        \\\"is_finished\\\": true\\n    },\\n    {\\n        \\\"task_id\\\": \\\"3\\\",\\n        \\\"dependent_task_ids\\\": [\\n            \\\"2\\\"\\n        ],\\n        \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n        \\\"task_type\\\": \\\"other\\\",\\n        \\\"code\\\": \\\"\\\",\\n        \\\"result\\\": \\\"\\\",\\n        \\\"is_success\\\": false,\\n        \\\"is_finished\\\": false\\n    }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
+        "code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set_style('whitegrid')\n\n# Pairplot to visualize the relationships between features\nsns.pairplot(iris_df, hue='target_names', markers='+')\nplt.show()"
+    },
    "\n## context\n\n\n-----\n\n## format example\n[CONTENT]\n{\n    \"invoice\": \"False\"\n}\n[/CONTENT]\n\n## nodes: \"<node>: <type>  # <instruction>\"\n- invoice: <class 'bool'>  # if it's a invoice file, return True else False\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n    \"invoice\": \"True\"\n}\n[/CONTENT]"
 }
--- a/tests/metagpt/actions/test_rebuild_class_view.py
+++ b/tests/metagpt/actions/test_rebuild_class_view.py
@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView
 from metagpt.llm import LLM


+@pytest.mark.skip
@pytest.mark.asyncio
 async def test_rebuild(context):
    action = RebuildClassView(
--- a/tests/metagpt/actions/test_summarize_code.py
+++ b/tests/metagpt/actions/test_summarize_code.py
@ -176,6 +176,7 @@ class Snake:
 """


+@pytest.mark.skip
@pytest.mark.asyncio
 async def test_summarize_code(context):
    git_dir = Path(__file__).parent / f"unittest/{uuid.uuid4().hex}"
--- a/tests/metagpt/provider/test_zhipuai_api.py
+++ b/tests/metagpt/provider/test_zhipuai_api.py
@ -19,7 +19,7 @@ resp_cont = resp_cont_tmpl.format(name=name)
 default_resp = get_part_chat_completion(name)


-async def mock_zhipuai_acreate_stream(**kwargs):
+async def mock_zhipuai_acreate_stream(self, **kwargs):
    class MockResponse(object):
        async def _aread(self):
            class Iterator(object):
@ -39,7 +39,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
    return MockResponse()


-async def mock_zhipuai_acreate(**kwargs) -> dict:
+async def mock_zhipuai_acreate(self, **kwargs) -> dict:
    return default_resp


--- a/tests/metagpt/roles/ci/test_code_interpreter.py
+++ b/tests/metagpt/roles/ci/test_code_interpreter.py
@ -17,3 +17,7 @@ async def test_code_interpreter(mocker, auto_run):
    rsp = await ci.run(requirement)
    logger.info(rsp)
    assert len(rsp.content) > 0
+
+    finished_tasks = ci.planner.plan.get_finished_tasks()
+    assert len(finished_tasks) > 0
+    assert len(finished_tasks[0].code) > 0  # check one task to see if code is recorded
--- a/tests/metagpt/utils/test_text.py
+++ b/tests/metagpt/utils/test_text.py
@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected
        (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
        (" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
        (" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
+        (" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8),
    ],
 )
 def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):