fix issue 654 and re-add system_msg judgement

This commit is contained in:
better629 2024-01-02 11:42:43 +08:00
parent 2243ea0462
commit 81334b733d
6 changed files with 11 additions and 10 deletions

View file

@ -43,7 +43,7 @@ class BaseLLM(ABC):
if system_msgs:
message = self._system_msgs(system_msgs)
else:
message = [self._default_system_msg()]
message = [self._default_system_msg()] if self.use_system_prompt else []
if format_msgs:
message.extend(format_msgs)
message.append(self._user_msg(msg))

View file

@ -64,8 +64,9 @@ class FireworksCostManager(CostManager):
token_costs = self.model_grade_token_costs(model)
cost = (prompt_tokens * token_costs["prompt"] + completion_tokens * token_costs["completion"]) / 1000000
self.total_cost += cost
max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
logger.info(
f"Total running cost: ${self.total_cost:.4f} | Max budget: ${CONFIG.max_budget:.3f} | "
f"Total running cost: ${self.total_cost:.4f} | Max budget: ${max_budget:.3f} | "
f"Current cost: ${cost:.4f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
)
CONFIG.total_cost = self.total_cost

View file

@ -30,9 +30,9 @@ class OllamaCostManager(CostManager):
"""
self.total_prompt_tokens += prompt_tokens
self.total_completion_tokens += completion_tokens
max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
logger.info(
f"Max budget: ${CONFIG.max_budget:.3f} | "
f"Max budget: ${max_budget:.3f} | "
f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
)
CONFIG.total_cost = self.total_cost

View file

@ -26,9 +26,9 @@ class OpenLLMCostManager(CostManager):
"""
self.total_prompt_tokens += prompt_tokens
self.total_completion_tokens += completion_tokens
max_budget = CONFIG.max_budget if CONFIG.max_budget else CONFIG.cost_manager.max_budget
logger.info(
f"Max budget: ${CONFIG.max_budget:.3f} | reference "
f"Max budget: ${max_budget:.3f} | reference "
f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
)

View file

@ -2,6 +2,7 @@
# -*- coding: utf-8 -*-
# @Desc : zhipu model api to support sync & async for invoke & sse_invoke
import json
import zhipuai
from zhipuai.model_api.api import InvokeType, ModelAPI
from zhipuai.utils.http_client import headers as zhipuai_default_headers
@ -51,7 +52,6 @@ class ZhiPuModelAPI(ModelAPI):
params=kwargs,
request_timeout=zhipuai.api_timeout_seconds,
)
return result
@classmethod
@ -61,6 +61,8 @@ class ZhiPuModelAPI(ModelAPI):
resp = await cls.arequest(
invoke_type=InvokeType.SYNC, stream=False, method="post", headers=headers, kwargs=kwargs
)
resp = resp.decode("utf-8")
resp = json.loads(resp)
return resp
@classmethod

View file

@ -38,12 +38,11 @@ class ZhiPuAILLM(BaseLLM):
From now, there is only one model named `chatglm_turbo`
"""
use_system_prompt: bool = False # zhipuai has no system prompt when use api
def __init__(self):
self.__init_zhipuai(CONFIG)
self.llm = ZhiPuModelAPI
self.model = "chatglm_turbo" # so far only one model, just use it
self.use_system_prompt: bool = False # zhipuai has no system prompt when use api
def __init_zhipuai(self, config: CONFIG):
assert config.zhipuai_api_key
@ -101,7 +100,6 @@ class ZhiPuAILLM(BaseLLM):
elif event.event == ZhiPuEvent.ERROR.value or event.event == ZhiPuEvent.INTERRUPTED.value:
content = event.data
logger.error(f"event error: {content}", end="")
collected_content.append([content])
elif event.event == ZhiPuEvent.FINISH.value:
"""
event.meta