Merge remote-tracking branch 'origin/main' into dev/wtg

# Conflicts: # metagpt/config.py # metagpt/provider/openai_api.py
2026-05-03 21:02:38 +02:00 · 2023-08-10 20:46:56 +08:00 · 2023-08-10 20:46:56 +08:00 · ce6b8a95dd
commit ce6b8a95dd
parent 90c687bbea 3d8e658395
61 changed files with 2567 additions and 382 deletions
--- a/metagpt/provider/openai_api.py
+++ b/metagpt/provider/openai_api.py
@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
@Time    : 2023/5/5 23:08
@ -21,6 +20,7 @@ from metagpt.utils.token_counter import (
    TOKEN_COSTS,
    count_message_tokens,
    count_string_tokens,
+    get_max_completion_tokens,
 )


@ -29,11 +29,13 @@ class RateLimiter:

    def __init__(self, rpm):
        self.last_call_time = 0
-        self.interval = 1.1 * 60 / rpm  # Here 1.1 is used because even if the calls are made strictly according to time, they will still be QOS'd; consider switching to simple error retry later
+        # Here 1.1 is used because even if the calls are made strictly according to time,
+        # they will still be QOS'd; consider switching to simple error retry later
+        self.interval = 1.1 * 60 / rpm
        self.rpm = rpm

    def split_batches(self, batch):
-        return [batch[i:i + self.rpm] for i in range(0, len(batch), self.rpm)]
+        return [batch[i : i + self.rpm] for i in range(0, len(batch), self.rpm)]

    async def wait_if_needed(self, num_requests):
        current_time = time.time()
@ -74,13 +76,12 @@ class CostManager(metaclass=Singleton):
        """
        self.total_prompt_tokens += prompt_tokens
        self.total_completion_tokens += completion_tokens
-        cost = (
-            prompt_tokens * TOKEN_COSTS[model]["prompt"]
-            + completion_tokens * TOKEN_COSTS[model]["completion"]
-        ) / 1000
+        cost = (prompt_tokens * TOKEN_COSTS[model]["prompt"] + completion_tokens * TOKEN_COSTS[model]["completion"]) / 1000
        self.total_cost += cost
-        logger.info(f"Total running cost: ${self.total_cost:.3f} | Max budget: ${CONFIG.max_budget:.3f} | "
-                    f"Current cost: ${cost:.3f}, {prompt_tokens=}, {completion_tokens=}")
+        logger.info(
+            f"Total running cost: ${self.total_cost:.3f} | Max budget: ${CONFIG.max_budget:.3f} | "
+            f"Current cost: ${cost:.3f}, prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}"
+        )
        CONFIG.total_cost = self.total_cost

    def get_total_prompt_tokens(self):
@ -133,6 +134,7 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
        self.__init_openai(CONFIG)
        self.llm = openai
        self.model = CONFIG.openai_api_model
+        self.auto_max_tokens = False
        self._cost_manager = CostManager()
        RateLimiter.__init__(self, rpm=self.rpm)

@ -146,10 +148,7 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
        self.rpm = int(config.get("RPM", 10))

    async def _achat_completion_stream(self, messages: list[dict]) -> str:
-        response = await openai.ChatCompletion.acreate(
-            **self._cons_kwargs(messages),
-            stream=True
-        )
+        response = await openai.ChatCompletion.acreate(**self._cons_kwargs(messages), stream=True)

        # create variables to collect the stream of chunks
        collected_chunks = []
@ -157,42 +156,42 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):
        # iterate through the stream of events
        async for chunk in response:
            collected_chunks.append(chunk)  # save the event response
-            chunk_message = chunk['choices'][0]['delta']  # extract the message
+            chunk_message = chunk["choices"][0]["delta"]  # extract the message
            collected_messages.append(chunk_message)  # save the message
            if "content" in chunk_message:
                print(chunk_message["content"], end="")
        print()

-        full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
+        full_reply_content = "".join([m.get("content", "") for m in collected_messages])
        usage = self._calc_usage(messages, full_reply_content)
        self._update_costs(usage)
        return full_reply_content

    def _cons_kwargs(self, messages: list[dict]) -> dict:
-        if CONFIG.openai_api_type == 'azure':
+        if CONFIG.openai_api_type == "azure":
            kwargs = {
                "deployment_id": CONFIG.deployment_id,
                "messages": messages,
-                "max_tokens": CONFIG.max_tokens_rsp,
+                "max_tokens": self.get_max_tokens(messages),
                "n": 1,
                "stop": None,
-                "temperature": 0.3
+                "temperature": 0.3,
            }
        else:
            kwargs = {
                "model": self.model,
                "messages": messages,
-                "max_tokens": CONFIG.max_tokens_rsp,
+                "max_tokens": self.get_max_tokens(messages),
                "n": 1,
                "stop": None,
-                "temperature": 0.3
+                "temperature": 0.3,
            }
        kwargs["timeout"] = 3
        return kwargs

    async def _achat_completion(self, messages: list[dict]) -> dict:
        rsp = await self.llm.ChatCompletion.acreate(**self._cons_kwargs(messages))
-        self._update_costs(rsp.get('usage'))
+        self._update_costs(rsp.get("usage"))
        return rsp

    def _chat_completion(self, messages: list[dict]) -> dict:
@ -267,3 +266,8 @@ class OpenAIGPTAPI(BaseGPTAPI, RateLimiter):

    def get_costs(self) -> Costs:
        return self._cost_manager.get_costs()
+
+    def get_max_tokens(self, messages: list[dict]):
+        if not self.auto_max_tokens:
+            return CONFIG.max_tokens_rsp
+        return get_max_completion_tokens(messages, self.model, CONFIG.max_tokens_rsp)