diff --git a/metagpt/provider/base_llm.py b/metagpt/provider/base_llm.py index a1fbeccd0..6560fc7dd 100644 --- a/metagpt/provider/base_llm.py +++ b/metagpt/provider/base_llm.py @@ -142,6 +142,38 @@ class BaseLLM(ABC): return Costs(0, 0, 0, 0) return self.cost_manager.get_costs() + def mask_base64_data(self, msg: dict) -> dict: + """Process the base64 image data in the message, replacing it with placeholders for easier logging + + Args: + msg (dict): A dictionary of messages in OpenAI format + + Returns: + dict: This is the processed message dictionary with the image data replaced with placeholders + """ + if not isinstance(msg, dict): + return msg + + new_msg = msg.copy() + content = new_msg.get("content") + img_base64_prefix = "data:image/" + + if isinstance(content, list): + # Handling multimodal content (like gpt-4v format) + new_content = [] + for item in content: + if isinstance(item, dict) and item.get("type") == "image_url": + image_url = item.get("image_url", {}).get("url", "") + if image_url.startswith(img_base64_prefix): + item = item.copy() + item["image_url"] = {"url": ""} + new_content.append(item) + new_msg["content"] = new_content + elif isinstance(content, str) and img_base64_prefix in content: + # Process plain text messages containing base64 image data + new_msg["content"] = "" + return new_msg + async def aask( self, msg: Union[str, list[dict[str, str]]], @@ -165,7 +197,11 @@ class BaseLLM(ABC): message.extend(msg) if stream is None: stream = self.config.stream - logger.debug(message) + + # the image data is replaced with placeholders to avoid long output + masked_message = [self.mask_base64_data(m) for m in message] + logger.debug(masked_message) + compressed_message = self.compress_messages(message, compress_type=self.config.compress_type) rsp = await self.acompletion_text(compressed_message, stream=stream, timeout=self.get_timeout(timeout)) # rsp = await self.acompletion_text(message, stream=stream, timeout=self.get_timeout(timeout)) diff --git a/metagpt/provider/constant.py b/metagpt/provider/constant.py index 1e372b07f..041063c4d 100644 --- a/metagpt/provider/constant.py +++ b/metagpt/provider/constant.py @@ -34,4 +34,12 @@ GENERAL_TOOL_CHOICE = {"type": "function", "function": {"name": "execute"}} MULTI_MODAL_MODELS = [ "gpt-4o", "gpt-4o-mini", + "openai/gpt-4o", + "gemini-2.0-flash-exp", + "gemini-2.0-pro-exp-02-05", + "claude-3-5-sonnet-v2", + "google/gemini-2.0-flash-exp:free", + "google/gemini-2.0-pro-exp-02-05:free", + "anthropic/claude-3.5-sonnet", + "anthropic/claude-3.7-sonnet", ]