Merge branch 'main' into feat_qianfan

This commit is contained in:
better629 2024-02-07 19:04:09 +08:00 committed by GitHub
commit 82557dfe81
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 69 additions and 34 deletions

View file

@ -1,5 +1,5 @@
llm:
api_type: "openai"
api_type: "openai" # or azure / ollama etc.
base_url: "YOUR_BASE_URL"
api_key: "YOUR_API_KEY"
model: "gpt-4-turbo-preview" # or gpt-3.5-turbo-1106 / gpt-4-1106-preview

23
examples/llm_vision.py Normal file
View file

@ -0,0 +1,23 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : example to run the ability of LLM vision
import asyncio
from pathlib import Path
from metagpt.llm import LLM
from metagpt.utils.common import encode_image
async def main():
llm = LLM()
# check if the configured llm supports llm-vision capacity. If not, it will throw a error
invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png")
img_base64 = encode_image(invoice_path)
res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64])
assert "true" in res.lower()
if __name__ == "__main__":
asyncio.run(main())

View file

@ -133,7 +133,7 @@ class CollectLinks(Action):
if len(remove) == 0:
break
model_name = config.get_openai_llm().model
model_name = config.llm.model
prompt = reduce_message_length(gen_msg(), model_name, system_text, 4096)
logger.debug(prompt)
queries = await self._aask(prompt, [system_text])

View file

@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor):
self, result: requests.Response, stream: bool
) -> Tuple[Union[bytes, Iterator[Generator]], bytes]:
"""Returns the response(s) and a bool indicating whether it is a stream."""
if stream and "text/event-stream" in result.headers.get("Content-Type", ""):
content_type = result.headers.get("Content-Type", "")
if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type):
return (
self._interpret_response_line(line, result.status_code, result.headers, stream=True)
for line in parse_stream(result.iter_lines())

View file

@ -233,14 +233,16 @@ class OpenAILLM(BaseLLM):
usage.prompt_tokens = count_message_tokens(messages, self.model)
usage.completion_tokens = count_string_tokens(rsp, self.model)
except Exception as e:
logger.error(f"usage calculation failed: {e}")
logger.warning(f"usage calculation failed: {e}")
return usage
def _get_max_tokens(self, messages: list[dict]):
if not self.auto_max_tokens:
return self.config.max_token
return get_max_completion_tokens(messages, self.model, self.config.max_tokens)
# FIXME
# https://community.openai.com/t/why-is-gpt-3-5-turbo-1106-max-tokens-limited-to-4096/494973/3
return min(get_max_completion_tokens(messages, self.model, self.config.max_token), 4096)
@handle_exception
async def amoderation(self, content: Union[str, list[str]]):

View file

@ -3,9 +3,8 @@
# @Desc : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk
from enum import Enum
from typing import Optional
import openai
import zhipuai
from requests import ConnectionError
from tenacity import (
after_log,
@ -14,6 +13,7 @@ from tenacity import (
stop_after_attempt,
wait_random_exponential,
)
from zhipuai.types.chat.chat_completion import Completion
from metagpt.configs.llm_config import LLMConfig, LLMType
from metagpt.logs import log_llm_stream, logger
@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM
from metagpt.provider.llm_provider_registry import register_provider
from metagpt.provider.openai_api import log_and_reraise
from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI
from metagpt.utils.cost_manager import CostManager
class ZhiPuEvent(Enum):
@ -38,27 +39,22 @@ class ZhiPuAILLM(BaseLLM):
"""
def __init__(self, config: LLMConfig):
self.__init_zhipuai(config)
self.llm = ZhiPuModelAPI
self.model = "chatglm_turbo" # so far only one model, just use it
self.use_system_prompt: bool = False # zhipuai has no system prompt when use api
self.config = config
self.__init_zhipuai()
self.cost_manager: Optional[CostManager] = None
def __init_zhipuai(self, config: LLMConfig):
assert config.api_key
zhipuai.api_key = config.api_key
# due to use openai sdk, set the api_key but it will't be used.
# openai.api_key = zhipuai.api_key # due to use openai sdk, set the api_key but it will't be used.
if config.proxy:
# FIXME: openai v1.x sdk has no proxy support
openai.proxy = config.proxy
def __init_zhipuai(self):
assert self.config.api_key
self.api_key = self.config.api_key
self.model = self.config.model # so far, it support glm-3-turbo、glm-4
self.llm = ZhiPuModelAPI(api_key=self.api_key)
def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict:
kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3}
return kwargs
def completion(self, messages: list[dict], timeout=3) -> dict:
resp = self.llm.chat.completions.create(**self._const_kwargs(messages))
resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages))
usage = resp.usage.model_dump()
self._update_costs(usage)
return resp.model_dump()

View file

@ -72,11 +72,7 @@ class CodeInterpreter(Role):
if ReviewConst.CHANGE_WORDS[0] in review:
counter = 0 # redo the task again with help of human suggestions
py_code = (
code["code"] if code.get("language") == "python" else ""
) # use python code as final code; for markdown, return the rendered result instead of the code itself
return py_code, result, success
return code["code"], result, success
async def _write_code(self):
todo = WriteCodeWithoutTools() if not self.use_tools else WriteCodeWithTools(selected_tools=self.tools)

View file

@ -42,6 +42,10 @@ class CostManager(BaseModel):
"""
self.total_prompt_tokens += prompt_tokens
self.total_completion_tokens += completion_tokens
if model not in TOKEN_COSTS:
logger.warning(f"Model {model} not found in TOKEN_COSTS.")
return
cost = (
prompt_tokens * self.token_costs[model]["prompt"]
+ completion_tokens * self.token_costs[model]["completion"]

View file

@ -25,7 +25,7 @@ def reduce_message_length(
"""
max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
for msg in msgs:
if count_string_tokens(msg, model_name) < max_token:
if count_string_tokens(msg, model_name) < max_token or model_name not in TOKEN_MAX:
return msg
raise RuntimeError("fail to reduce message length")
@ -93,7 +93,7 @@ def split_paragraph(paragraph: str, sep: str = ".,", count: int = 2) -> list[str
continue
ret = ["".join(j) for j in _split_by_count(sentences, count)]
return ret
return _split_by_count(paragraph, count)
return list(_split_by_count(paragraph, count))
def decode_unicode_escape(text: str) -> str:

View file

@ -32,8 +32,8 @@ TOKEN_COSTS = {
"gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator
"gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
"glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
"glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
"glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
"glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
}
@ -111,7 +111,8 @@ TOKEN_MAX = {
"gpt-4-vision-preview": 128000,
"gpt-4-1106-vision-preview": 128000,
"text-embedding-ada-002": 8192,
"chatglm_turbo": 32768,
"glm-3-turbo": 128000,
"glm-4": 128000,
"gemini-pro": 32768,
}

View file

@ -63,7 +63,7 @@ gitignore-parser==0.1.9
websockets~=12.0
networkx~=3.2.1
google-generativeai==0.3.2
# playwright==1.40.0 # playwright extras require
playwright>=1.26 # used at metagpt/tools/libs/web_scraping.py
anytree
ipywidgets==8.1.1
Pillow

View file

@ -24,7 +24,6 @@ requirements = (here / "requirements.txt").read_text(encoding="utf-8").splitline
extras_require = {
"playwright": ["playwright>=1.26", "beautifulsoup4"],
"selenium": ["selenium>4", "webdriver_manager", "beautifulsoup4"],
"search-google": ["google-api-python-client==2.94.0"],
"search-ddg": ["duckduckgo-search~=4.1.1"],

View file

@ -389,5 +389,11 @@
"reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.",
"improved_impl": "def sort_array(arr):\n # Helper function to count the number of ones in the binary representation\n def count_ones(n):\n return bin(n).count('1')\n \n # Sort the array using a custom key function\n # The key function returns a tuple (number of ones, value) for each element\n # This ensures that if two elements have the same number of ones, they are sorted by their value\n # We use a stable sorting algorithm to maintain the relative order of elements with the same number of ones\n sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x), reverse=True)\n sorted_arr = sorted(sorted_arr, key=count_ones)\n \n return sorted_arr\n"
},
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "import pandas as pd\n\n# Convert the Iris dataset to a DataFrame for easier manipulation\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\niris_df['target'] = iris_data.target\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\n\n# Display basic information about the dataset\niris_df.info()\n\n# Display statistical summary of the dataset\niris_df.describe()\n\n# Display the first few rows of the dataset\ndisplay(iris_df.head())"
},
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"import pandas as pd\\\\n\\\\n# Convert the Iris dataset to a DataFrame for easier manipulation\\\\niris_df = pd.DataFrame(data=iris_data.data, columns=iris_data.feature_names)\\\\niris_df['target'] = iris_data.target\\\\niris_df['target_names'] = iris_df['target'].apply(lambda x: iris_data.target_names[x])\\\\n\\\\n# Display basic information about the dataset\\\\niris_df.info()\\\\n\\\\n# Display statistical summary of the dataset\\\\niris_df.describe()\\\\n\\\\n# Display the first few rows of the dataset\\\\ndisplay(iris_df.head())\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set_style('whitegrid')\n\n# Pairplot to visualize the relationships between features\nsns.pairplot(iris_df, hue='target_names', markers='+')\nplt.show()"
},
"\n## context\n\n\n-----\n\n## format example\n[CONTENT]\n{\n \"invoice\": \"False\"\n}\n[/CONTENT]\n\n## nodes: \"<node>: <type> # <instruction>\"\n- invoice: <class 'bool'> # if it's a invoice file, return True else False\n\n\n## constraint\nLanguage: Please use the same language as Human INPUT.\nFormat: output wrapped inside [CONTENT][/CONTENT] like format example, nothing else.\n\n## action\nFollow instructions of nodes, generate output and make sure it follows the format example.\n": "[CONTENT]\n{\n \"invoice\": \"True\"\n}\n[/CONTENT]"
}

View file

@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView
from metagpt.llm import LLM
@pytest.mark.skip
@pytest.mark.asyncio
async def test_rebuild(context):
action = RebuildClassView(

View file

@ -176,6 +176,7 @@ class Snake:
"""
@pytest.mark.skip
@pytest.mark.asyncio
async def test_summarize_code(context):
git_dir = Path(__file__).parent / f"unittest/{uuid.uuid4().hex}"

View file

@ -19,7 +19,7 @@ resp_cont = resp_cont_tmpl.format(name=name)
default_resp = get_part_chat_completion(name)
async def mock_zhipuai_acreate_stream(**kwargs):
async def mock_zhipuai_acreate_stream(self, **kwargs):
class MockResponse(object):
async def _aread(self):
class Iterator(object):
@ -39,7 +39,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
return MockResponse()
async def mock_zhipuai_acreate(**kwargs) -> dict:
async def mock_zhipuai_acreate(self, **kwargs) -> dict:
return default_resp

View file

@ -17,3 +17,7 @@ async def test_code_interpreter(mocker, auto_run):
rsp = await ci.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0
finished_tasks = ci.planner.plan.get_finished_tasks()
assert len(finished_tasks) > 0
assert len(finished_tasks[0].code) > 0 # check one task to see if code is recorded

View file

@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected
(" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
(" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
(" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
(" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8),
],
)
def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):