diff --git a/examples/llm_hello_world.py b/examples/llm_hello_world.py index 1d132eb8a..219a303c8 100644 --- a/examples/llm_hello_world.py +++ b/examples/llm_hello_world.py @@ -6,11 +6,9 @@ @File : llm_hello_world.py """ import asyncio -from pathlib import Path from metagpt.llm import LLM from metagpt.logs import logger -from metagpt.utils.common import encode_image async def main(): @@ -29,12 +27,6 @@ async def main(): if hasattr(llm, "completion"): logger.info(llm.completion(hello_msg)) - # check if the configured llm supports llm-vision capacity. If not, it will throw a error - invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") - img_base64 = encode_image(invoice_path) - res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64]) - assert "true" in res.lower() - if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/llm_vision.py b/examples/llm_vision.py new file mode 100644 index 000000000..276decd59 --- /dev/null +++ b/examples/llm_vision.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# @Desc : example to run the ability of LLM vision + +import asyncio +from pathlib import Path + +from metagpt.llm import LLM +from metagpt.utils.common import encode_image + + +async def main(): + llm = LLM() + + # check if the configured llm supports llm-vision capacity. If not, it will throw a error + invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") + img_base64 = encode_image(invoice_path) + res = await llm.aask(msg="if this is a invoice, just return True else return False", images=[img_base64]) + assert "true" in res.lower() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/metagpt/provider/general_api_requestor.py b/metagpt/provider/general_api_requestor.py index 500cd1426..18f4dd909 100644 --- a/metagpt/provider/general_api_requestor.py +++ b/metagpt/provider/general_api_requestor.py @@ -60,7 +60,8 @@ class GeneralAPIRequestor(APIRequestor): self, result: requests.Response, stream: bool ) -> Tuple[Union[bytes, Iterator[Generator]], bytes]: """Returns the response(s) and a bool indicating whether it is a stream.""" - if stream and "text/event-stream" in result.headers.get("Content-Type", ""): + content_type = result.headers.get("Content-Type", "") + if stream and ("text/event-stream" in content_type or "application/x-ndjson" in content_type): return ( self._interpret_response_line(line, result.status_code, result.headers, stream=True) for line in parse_stream(result.iter_lines()) diff --git a/metagpt/provider/zhipuai_api.py b/metagpt/provider/zhipuai_api.py index 9108a1fba..9e8e5fb53 100644 --- a/metagpt/provider/zhipuai_api.py +++ b/metagpt/provider/zhipuai_api.py @@ -3,9 +3,8 @@ # @Desc : zhipuai LLM from https://open.bigmodel.cn/dev/api#sdk from enum import Enum +from typing import Optional -import openai -import zhipuai from requests import ConnectionError from tenacity import ( after_log, @@ -14,6 +13,7 @@ from tenacity import ( stop_after_attempt, wait_random_exponential, ) +from zhipuai.types.chat.chat_completion import Completion from metagpt.configs.llm_config import LLMConfig, LLMType from metagpt.logs import log_llm_stream, logger @@ -21,6 +21,7 @@ from metagpt.provider.base_llm import BaseLLM from metagpt.provider.llm_provider_registry import register_provider from metagpt.provider.openai_api import log_and_reraise from metagpt.provider.zhipuai.zhipu_model_api import ZhiPuModelAPI +from metagpt.utils.cost_manager import CostManager class ZhiPuEvent(Enum): @@ -38,20 +39,15 @@ class ZhiPuAILLM(BaseLLM): """ def __init__(self, config: LLMConfig): - self.__init_zhipuai(config) - self.llm = ZhiPuModelAPI - self.model = "chatglm_turbo" # so far only one model, just use it - self.use_system_prompt: bool = False # zhipuai has no system prompt when use api self.config = config + self.__init_zhipuai() + self.cost_manager: Optional[CostManager] = None - def __init_zhipuai(self, config: LLMConfig): - assert config.api_key - zhipuai.api_key = config.api_key - # due to use openai sdk, set the api_key but it will't be used. - # openai.api_key = zhipuai.api_key # due to use openai sdk, set the api_key but it will't be used. - if config.proxy: - # FIXME: openai v1.x sdk has no proxy support - openai.proxy = config.proxy + def __init_zhipuai(self): + assert self.config.api_key + self.api_key = self.config.api_key + self.model = self.config.model # so far, it support glm-3-turbo、glm-4 + self.llm = ZhiPuModelAPI(api_key=self.api_key) def _const_kwargs(self, messages: list[dict], stream: bool = False) -> dict: kwargs = {"model": self.model, "messages": messages, "stream": stream, "temperature": 0.3} @@ -63,12 +59,12 @@ class ZhiPuAILLM(BaseLLM): try: prompt_tokens = int(usage.get("prompt_tokens", 0)) completion_tokens = int(usage.get("completion_tokens", 0)) - self.config.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model) + self.cost_manager.update_cost(prompt_tokens, completion_tokens, self.model) except Exception as e: logger.error(f"zhipuai updats costs failed! exp: {e}") def completion(self, messages: list[dict], timeout=3) -> dict: - resp = self.llm.chat.completions.create(**self._const_kwargs(messages)) + resp: Completion = self.llm.chat.completions.create(**self._const_kwargs(messages)) usage = resp.usage.model_dump() self._update_costs(usage) return resp.model_dump() diff --git a/metagpt/utils/token_counter.py b/metagpt/utils/token_counter.py index a0fb3b70d..65f5fe76f 100644 --- a/metagpt/utils/token_counter.py +++ b/metagpt/utils/token_counter.py @@ -32,8 +32,8 @@ TOKEN_COSTS = { "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03}, "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0}, - "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens - "glm-4": {"prompt": 0.0, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens + "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens + "glm-4": {"prompt": 0.014, "completion": 0.014}, # 128k version, prompt + completion tokens=0.1¥/k-tokens "gemini-pro": {"prompt": 0.00025, "completion": 0.0005}, } @@ -58,7 +58,8 @@ TOKEN_MAX = { "gpt-4-vision-preview": 128000, "gpt-4-1106-vision-preview": 128000, "text-embedding-ada-002": 8192, - "chatglm_turbo": 32768, + "glm-3-turbo": 128000, + "glm-4": 128000, "gemini-pro": 32768, } diff --git a/tests/metagpt/provider/test_zhipuai_api.py b/tests/metagpt/provider/test_zhipuai_api.py index 798209710..ad2ececa2 100644 --- a/tests/metagpt/provider/test_zhipuai_api.py +++ b/tests/metagpt/provider/test_zhipuai_api.py @@ -17,7 +17,7 @@ default_resp = { } -async def mock_zhipuai_acreate_stream(**kwargs): +async def mock_zhipuai_acreate_stream(self, **kwargs): class MockResponse(object): async def _aread(self): class Iterator(object): @@ -37,7 +37,7 @@ async def mock_zhipuai_acreate_stream(**kwargs): return MockResponse() -async def mock_zhipuai_acreate(**kwargs) -> dict: +async def mock_zhipuai_acreate(self, **kwargs) -> dict: return default_resp