From ee4aba206e80e3d0f144f8713f1ae17458d77cd5 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 6 Feb 2024 23:22:53 +0800 Subject: [PATCH 01/12] fix empty code when aask_code not returning language --- metagpt/roles/ci/code_interpreter.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/metagpt/roles/ci/code_interpreter.py b/metagpt/roles/ci/code_interpreter.py index 796abba04..404c93b81 100644 --- a/metagpt/roles/ci/code_interpreter.py +++ b/metagpt/roles/ci/code_interpreter.py @@ -72,11 +72,7 @@ class CodeInterpreter(Role): if ReviewConst.CHANGE_WORDS[0] in review: counter = 0 # redo the task again with help of human suggestions - py_code = ( - code["code"] if code.get("language") == "python" else "" - ) # use python code as final code; for markdown, return the rendered result instead of the code itself - - return py_code, result, success + return code["code"], result, success async def _write_code(self): todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools) From 78989b0eb7dd012442cb480ceed217d8ecc28f03 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 6 Feb 2024 23:37:24 +0800 Subject: [PATCH 02/12] skip two individual tests --- tests/metagpt/actions/test_rebuild_class_view.py | 1 + tests/metagpt/actions/test_summarize_code.py | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/metagpt/actions/test_rebuild_class_view.py b/tests/metagpt/actions/test_rebuild_class_view.py index 403109cc0..2188d6b85 100644 --- a/tests/metagpt/actions/test_rebuild_class_view.py +++ b/tests/metagpt/actions/test_rebuild_class_view.py @@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView from metagpt.llm import LLM +@pytest.mark.skip @pytest.mark.asyncio async def test_rebuild(context): action = RebuildClassView( diff --git a/tests/metagpt/actions/test_summarize_code.py b/tests/metagpt/actions/test_summarize_code.py index a404047c1..3cfe7ca81 100644 --- a/tests/metagpt/actions/test_summarize_code.py +++ b/tests/metagpt/actions/test_summarize_code.py @@ -176,6 +176,7 @@ class Snake: """ +@pytest.mark.skip @pytest.mark.asyncio async def test_summarize_code(context): git_dir = Path(__file__).parent / f"unittest/{uuid.uuid4().hex}" From ea6c440294650122c3e6df6f8d3a722b746bbe89 Mon Sep 17 00:00:00 2001 From: yzlin Date: Tue, 6 Feb 2024 23:57:39 +0800 Subject: [PATCH 03/12] add ut to check code saving --- tests/data/rsp_cache.json | 6 ++++++ tests/metagpt/roles/ci/test_code_interpreter.py | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/tests/data/rsp_cache.json b/tests/data/rsp_cache.json index 40d7d3953..75fc9ceb2 100644 --- a/tests/data/rsp_cache.json +++ b/tests/data/rsp_cache.json @@ -389,5 +389,11 @@ "reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.", "improved_impl": "def sort_array(arr):\n # Helper function to count the number of ones in the binary representation\n def count_ones(n):\n return bin(n).count('1')\n \n # Sort the array using a custom key function\n # The key function returns a tuple (number of ones, value) for each element\n # This ensures that if two elements have the same number of ones, they are sorted by their value\n # We use a stable sorting algorithm to maintain the relative order of elements with the same number of ones\n sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x), reverse=True)\n sorted_arr = sorted(sorted_arr, key=count_ones)\n \n return sorted_arr\n" }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "import pandas as pd\n\n# Convert the Iris dataset to a DataFrame for easier manipulation\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\niris_df['target'] = iris_data.target\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\n\n# Display basic information about the dataset\niris_df.info()\n\n# Display statistical summary of the dataset\niris_df.describe()\n\n# Display the first few rows of the dataset\ndisplay(iris_df.head())" + }, + "[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"import pandas as pd\\\\n\\\\n# Convert the Iris dataset to a DataFrame for easier manipulation\\\\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\\\\niris_df['target'] = iris_data.target\\\\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\\\\n\\\\n# Display basic information about the dataset\\\\niris_df.info()\\\\n\\\\n# Display statistical summary of the dataset\\\\niris_df.describe()\\\\n\\\\n# Display the first few rows of the dataset\\\\ndisplay(iris_df.head())\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": { + "code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set_style('whitegrid')\n\n# Pairplot to visualize the relationships between features\nsns.pairplot(iris_df, hue='target_names', markers='+')\nplt.show()" + }, "\n## context\n\n\n-----\n\n## format example\n[CONTENT]\n{\n \"invoice\": \"False\"\n}\n[/CONTENT]\n\n## nodes: \": # \"\n- invoice: # if it's a invoice file, return True else False\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"invoice\": \"True\"\n}\n[/CONTENT]" } \ No newline at end of file diff --git a/tests/metagpt/roles/ci/test_code_interpreter.py b/tests/metagpt/roles/ci/test_code_interpreter.py index f23292965..9d2f2429b 100644 --- a/tests/metagpt/roles/ci/test_code_interpreter.py +++ b/tests/metagpt/roles/ci/test_code_interpreter.py @@ -17,3 +17,7 @@ async def test_code_interpreter(mocker, auto_run): rsp = await ci.run(requirement) logger.info(rsp) assert len(rsp.content) > 0 + + finished_tasks = ci.planner.plan.get_finished_tasks() + assert len(finished_tasks) > 0 + assert len(finished_tasks[0].code) > 0 # check one task to see if code is recorded From 6f31289e7e0efd96a22400b31df8179eab286875 Mon Sep 17 00:00:00 2001 From: better629 Date: Wed, 7 Feb 2024 10:02:15 +0800 Subject: [PATCH 04/12] re-commit zhipu-api due to merge mistake --- examples/llm_hello_world.py | 8 ------- examples/llm_vision.py | 23 ++++++++++++++++++ metagpt/provider/general_api_requestor.py | 3 ++- metagpt/provider/zhipuai_api.py | 28 ++++++++++------------ metagpt/utils/token_counter.py | 7 +++--- tests/metagpt/provider/test_zhipuai_api.py | 4 ++-- 6 files changed, 43 insertions(+), 30 deletions(-) create mode 100644 examples/llm_vision.py diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py index 1d132eb8a..219a303c8 100644 --- a/examples/llm_hello_world.py +++ b/examples/llm_hello_world.py @@ -6,11 +6,9 @@ @File : llm_hello_world.py """ import asyncio -from pathlib import Path from metagpt.llm import LLM from metagpt.logs import logger -from metagpt.utils.common import encode_image async def main(): @@ -29,12 +27,6 @@ async def main(): if hasattr(llm, "completion"): logger.info(llm.completion(hello_msg)) - # check if the configured llm supports llm-vision capacity. If not, it will throw a error - invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") - img_base64 = encode_image(invoice_path) - res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64]) - assert "true" in res.lower() - if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/llm_vision.py b/examples/llm_vision.py new file mode 100644 index 000000000..276decd59 --- /dev/null +++ b/examples/llm_vision.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : example to run the ability of LLM vision + +import asyncio +from pathlib import Path + +from metagpt.llm import LLM +from metagpt.utils.common import encode_image + + +async def main(): + llm = LLM() + + # check if the configured llm supports llm-vision capacity. If not, it will throw a error + invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") + img_base64 = encode_image(invoice_path) + res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64]) + assert "true" in res.lower() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/metagpt/provider/general_api_requestor.py b/metagpt/provider/general_api_requestor.py index 500cd1426..18f4dd909 100644 --- a/metagpt/provider/general_api_requestor.py +++ b/metagpt/provider/general_api_requestor.py @@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor): self, result: requests.Response, stream: bool ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]: """Returns the response(s) and a bool indicating whether it is a stream.""" - if stream and "text/event-stream" in result.headers.get("Content-Type", ""): + content_type = result.headers.get("Content-Type", "") + if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type): return ( self._interpret_response_line(line, result.status_code, result.headers, stream=True) for line in parse_stream(result.iter_lines()) diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py index 9108a1fba..9e8e5fb53 100644 --- a/metagpt/provider/zhipuai_api.py +++ b/metagpt/provider/zhipuai_api.py @@ -3,9 +3,8 @@ # @Desc : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk from enum import Enum +from typing import Optional -import openai -import zhipuai from requests import ConnectionError from tenacity import ( after_log, @@ -14,6 +13,7 @@ from tenacity import ( stop_after_attempt, wait_random_exponential, ) +from zhipuai.types.chat.chat_completion import Completion from metagpt.configs.llm_config import LLMConfig, LLMType from metagpt.logs import log_llm_stream, logger @@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM from metagpt.provider.llm_provider_registry import register_provider from metagpt.provider.openai_api import log_and_reraise from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI +from metagpt.utils.cost_manager import CostManager class ZhiPuEvent(Enum): @@ -38,20 +39,15 @@ class ZhiPuAILLM(BaseLLM): """ def __init__(self, config: LLMConfig): - self.__init_zhipuai(config) - self.llm = ZhiPuModelAPI - self.model = "chatglm_turbo" # so far only one model, just use it - self.use_system_prompt: bool = False # zhipuai has no system prompt when use api self.config = config + self.__init_zhipuai() + self.cost_manager: Optional[CostManager] = None - def __init_zhipuai(self, config: LLMConfig): - assert config.api_key - zhipuai.api_key = config.api_key - # due to use openai sdk, set the api_key but it will't be used. - # openai.api_key = zhipuai.api_key # due to use openai sdk, set the api_key but it will't be used. - if config.proxy: - # FIXME: openai v1.x sdk has no proxy support - openai.proxy = config.proxy + def __init_zhipuai(self): + assert self.config.api_key + self.api_key = self.config.api_key + self.model = self.config.model # so far, it support glm-3-turbo、glm-4 + self.llm = ZhiPuModelAPI(api_key=self.api_key) def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict: kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3} @@ -63,12 +59,12 @@ class ZhiPuAILLM(BaseLLM): try: prompt_tokens = int(usage.get("prompt_tokens", 0)) completion_tokens = int(usage.get("completion_tokens", 0)) - self.config.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model) + self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model) except Exception as e: logger.error(f"zhipuai updats costs failed! exp: {e}") def completion(self, messages: list[dict], timeout=3) -> dict: - resp = self.llm.chat.completions.create(**self._const_kwargs(messages)) + resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages)) usage = resp.usage.model_dump() self._update_costs(usage) return resp.model_dump() diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index a0fb3b70d..65f5fe76f 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -32,8 +32,8 @@ TOKEN_COSTS = { "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, - "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens - "glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens + "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens + "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens "gemini-pro": {"prompt": 0.00025, "completion": 0.0005}, } @@ -58,7 +58,8 @@ TOKEN_MAX = { "gpt-4-vision-preview": 128000, "gpt-4-1106-vision-preview": 128000, "text-embedding-ada-002": 8192, - "chatglm_turbo": 32768, + "glm-3-turbo": 128000, + "glm-4": 128000, "gemini-pro": 32768, } diff --git a/tests/metagpt/provider/test_zhipuai_api.py b/tests/metagpt/provider/test_zhipuai_api.py index 798209710..ad2ececa2 100644 --- a/tests/metagpt/provider/test_zhipuai_api.py +++ b/tests/metagpt/provider/test_zhipuai_api.py @@ -17,7 +17,7 @@ default_resp = { } -async def mock_zhipuai_acreate_stream(**kwargs): +async def mock_zhipuai_acreate_stream(self, **kwargs): class MockResponse(object): async def _aread(self): class Iterator(object): @@ -37,7 +37,7 @@ async def mock_zhipuai_acreate_stream(**kwargs): return MockResponse() -async def mock_zhipuai_acreate(**kwargs) -> dict: +async def mock_zhipuai_acreate(self, **kwargs) -> dict: return default_resp From 3b4379d12569cae719ff58f6c39208eed05483aa Mon Sep 17 00:00:00 2001 From: voidking Date: Wed, 7 Feb 2024 10:34:04 +0800 Subject: [PATCH 05/12] chore: move the required playwright to requirements.txt --- requirements.txt | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6cb25d52b..804ff4359 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,7 +63,7 @@ gitignore-parser==0.1.9 websockets~=12.0 networkx~=3.2.1 google-generativeai==0.3.2 -# playwright==1.40.0 # playwright extras require +playwright>=1.26 # used at metagpt/tools/libs/web_scraping.py anytree ipywidgets==8.1.1 Pillow diff --git a/setup.py b/setup.py index b16d978cf..be3956ea4 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,6 @@ requirements = (here / "requirements.txt").read_text(encoding="utf-8").splitline extras_require = { - "playwright": ["playwright>=1.26", "beautifulsoup4"], "selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"], "search-google": ["google-api-python-client==2.94.0"], "search-ddg": ["duckduckgo-search~=4.1.1"], From 63ab24a77bbb850baed77b515941342d48329aca Mon Sep 17 00:00:00 2001 From: voidking Date: Wed, 7 Feb 2024 11:54:31 +0800 Subject: [PATCH 06/12] chore: add one more space --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 804ff4359..1426500ce 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,7 +63,7 @@ gitignore-parser==0.1.9 websockets~=12.0 networkx~=3.2.1 google-generativeai==0.3.2 -playwright>=1.26 # used at metagpt/tools/libs/web_scraping.py +playwright>=1.26 # used at metagpt/tools/libs/web_scraping.py anytree ipywidgets==8.1.1 Pillow From 4370060802b3da936880aefb7aa28a6ba22780cd Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 7 Feb 2024 16:23:54 +0800 Subject: [PATCH 07/12] fix bug --- config/config2.yaml.example | 2 +- metagpt/actions/research.py | 2 +- metagpt/utils/cost_manager.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/config/config2.yaml.example b/config/config2.yaml.example index 8f4a33fc1..2217f1b2c 100644 --- a/config/config2.yaml.example +++ b/config/config2.yaml.example @@ -1,5 +1,5 @@ llm: - api_type: "openai" + api_type: "openai" # or azure / ollama etc. base_url: "YOUR_BASE_URL" api_key: "YOUR_API_KEY" model: "gpt-4-turbo-preview" # or gpt-3.5-turbo-1106 / gpt-4-1106-preview diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 2ebeadb66..316e9f299 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -133,7 +133,7 @@ class CollectLinks(Action): if len(remove) == 0: break - model_name = config.get_openai_llm().model + model_name = config.model prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096) logger.debug(prompt) queries = await self._aask(prompt, [system_text]) diff --git a/metagpt/utils/cost_manager.py b/metagpt/utils/cost_manager.py index 7bf5154b6..c4c93f91f 100644 --- a/metagpt/utils/cost_manager.py +++ b/metagpt/utils/cost_manager.py @@ -41,6 +41,10 @@ class CostManager(BaseModel): """ self.total_prompt_tokens += prompt_tokens self.total_completion_tokens += completion_tokens + if model not in TOKEN_COSTS: + logger.warning(f"Model {model} not found in TOKEN_COSTS.") + return + cost = ( prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"] ) / 1000 From c0867643d828084e7503f05ae44987dccf3687d1 Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 7 Feb 2024 16:24:33 +0800 Subject: [PATCH 08/12] fix bug --- metagpt/actions/research.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/actions/research.py b/metagpt/actions/research.py index 316e9f299..ce8d8a967 100644 --- a/metagpt/actions/research.py +++ b/metagpt/actions/research.py @@ -133,7 +133,7 @@ class CollectLinks(Action): if len(remove) == 0: break - model_name = config.model + model_name = config.llm.model prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096) logger.debug(prompt) queries = await self._aask(prompt, [system_text]) From d112371dadf02ee9a828c6708d0bbaa3e600c113 Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 7 Feb 2024 16:33:24 +0800 Subject: [PATCH 09/12] fix bug --- metagpt/utils/text.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/utils/text.py b/metagpt/utils/text.py index dd9678438..921efe706 100644 --- a/metagpt/utils/text.py +++ b/metagpt/utils/text.py @@ -25,7 +25,7 @@ def reduce_message_length( """ max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved for msg in msgs: - if count_string_tokens(msg, model_name) < max_token: + if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX: return msg raise RuntimeError("fail to reduce message length") From 50a14718baeacfada5cf7008e2761a801adbd968 Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 7 Feb 2024 16:37:23 +0800 Subject: [PATCH 10/12] refine log --- metagpt/provider/openai_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 63e68c9bd..120748d15 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -236,7 +236,7 @@ class OpenAILLM(BaseLLM): usage.prompt_tokens = count_message_tokens(messages, self.model) usage.completion_tokens = count_string_tokens(rsp, self.model) except Exception as e: - logger.error(f"usage calculation failed: {e}") + logger.warning(f"usage calculation failed: {e}") return usage From ce63e455dfe1071a99ee421c1e17df07db20200d Mon Sep 17 00:00:00 2001 From: geekan Date: Wed, 7 Feb 2024 17:03:10 +0800 Subject: [PATCH 11/12] fix bug --- metagpt/provider/openai_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 120748d15..756f8c483 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -253,7 +253,7 @@ class OpenAILLM(BaseLLM): def _get_max_tokens(self, messages: list[dict]): if not self.auto_max_tokens: return self.config.max_token - return get_max_completion_tokens(messages, self.model, self.config.max_tokens) + return get_max_completion_tokens(messages, self.model, self.config.max_token) @handle_exception async def amoderation(self, content: Union[str, list[str]]): From d94f4fbfbc3bd4310669f06e9bac9a7c89001712 Mon Sep 17 00:00:00 2001 From: shenchucheng Date: Wed, 7 Feb 2024 17:44:36 +0800 Subject: [PATCH 12/12] fix research bugs --- metagpt/provider/openai_api.py | 4 +++- metagpt/utils/text.py | 2 +- tests/metagpt/utils/test_text.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/metagpt/provider/openai_api.py b/metagpt/provider/openai_api.py index 63e68c9bd..7b2cd6220 100644 --- a/metagpt/provider/openai_api.py +++ b/metagpt/provider/openai_api.py @@ -253,7 +253,9 @@ class OpenAILLM(BaseLLM): def _get_max_tokens(self, messages: list[dict]): if not self.auto_max_tokens: return self.config.max_token - return get_max_completion_tokens(messages, self.model, self.config.max_tokens) + # FIXME + # https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3 + return min(get_max_completion_tokens(messages, self.model, self.config.max_tokens), 4096) @handle_exception async def amoderation(self, content: Union[str, list[str]]): diff --git a/metagpt/utils/text.py b/metagpt/utils/text.py index 921efe706..fb8b94232 100644 --- a/metagpt/utils/text.py +++ b/metagpt/utils/text.py @@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str continue ret = ["".join(j) for j in _split_by_count(sentences, count)] return ret - return _split_by_count(paragraph, count) + return list(_split_by_count(paragraph, count)) def decode_unicode_escape(text: str) -> str: diff --git a/tests/metagpt/utils/test_text.py b/tests/metagpt/utils/test_text.py index 7003c7767..c9a9753be 100644 --- a/tests/metagpt/utils/test_text.py +++ b/tests/metagpt/utils/test_text.py @@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected (" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1), (" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2), (" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1), + (" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8), ], ) def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):