Optimize: Update certain prompt formats.

2026-04-29 19:06:23 +02:00 · 2023-08-09 20:28:31 +08:00 · 2023-08-09 20:28:31 +08:00 · e43ae21d98
commit e43ae21d98
parent 2d8a3f1296
9 changed files with 43 additions and 49 deletions
--- a/metagpt/utils/parse_html.py
+++ b/metagpt/utils/parse_html.py
@ -2,7 +2,7 @@
 from __future__ import annotations

 from typing import Generator, Optional
-from urllib.parse import urljoin
+from urllib.parse import urljoin, urlparse

 from bs4 import BeautifulSoup
 from pydantic import BaseModel
@ -35,11 +35,11 @@ class WebPage(BaseModel):
    def get_links(self) -> Generator[str, None, None]:
        for i in self.soup.find_all("a", href=True):
            url = i["href"]
-            if url.startswith("data:"):
-                continue
-            if not url.startswith(("http://", "https://")):
-                url = urljoin(self.url, url)
-            yield url
+            result = urlparse(url)
+            if not result.scheme and result.path:
+                yield urljoin(self.url, url)
+            elif url.startswith(("http://", "https://")):
+                yield urljoin(self.url, url)


 def get_html_content(page: str, base: str):
--- a/metagpt/utils/text.py
+++ b/metagpt/utils/text.py
@ -4,19 +4,19 @@ from metagpt.utils.token_counter import TOKEN_MAX, count_string_tokens


 def reduce_message_length(msgs: Generator[str, None, None], model_name: str, system_text: str, reserved: int = 0,) -> str:
-    """Reduce the length of messages to fit within the maximum token size.
+    """Reduce the length of concatenated message segments to fit within the maximum token size.

    Args:
-        msgs: A generator of strings representing the messages.
+        msgs: A generator of strings representing progressively shorter valid prompts.
        model_name: The name of the encoding to use. (e.g., "gpt-3.5-turbo")
        system_text: The system prompts.
        reserved: The number of reserved tokens.

    Returns:
-        The reduced message.
+        The concatenated message segments reduced to fit within the maximum token size.

    Raises:
-        RuntimeError: If it fails to reduce the message length.
+        RuntimeError: If it fails to reduce the concatenated message length.
    """
    max_token = TOKEN_MAX.get(model_name, 2048) - count_string_tokens(system_text, model_name) - reserved
    for msg in msgs:
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -96,7 +96,7 @@ def count_string_tokens(string: str, model_name: str) -> int:
    return len(encoding.encode(string))


-def get_max_completion_tokens(messages: list[dict], model: str): 
+def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> int: 
    """Calculate the maximum number of completion tokens for a given model and list of messages.

    Args:
@ -106,4 +106,6 @@ def get_max_completion_tokens(messages: list[dict], model: str):
    Returns:
        The maximum number of completion tokens.
    """
-    return TOKEN_MAX.get(model, 4096) - count_message_tokens(messages)
+    if model not in TOKEN_MAX:
+        return default
+    return TOKEN_MAX[model] - count_message_tokens(messages)