add gpt-4v support for aask and AN.fill

2026-04-30 19:36:24 +02:00 · 2024-01-30 11:59:53 +08:00 · 2024-01-30 11:59:53 +08:00 · 310687258e
commit 310687258e
parent 9e49e2252d
8 changed files with 113 additions and 14 deletions
--- a/metagpt/utils/common.py
+++ b/metagpt/utils/common.py
@ -12,7 +12,9 @@
 from __future__ import annotations

 import ast
+import base64
 import contextlib
+import csv
 import importlib
 import inspect
 import json
@ -465,6 +467,29 @@ def write_json_file(json_file: str, data: list, encoding=None):
        json.dump(data, fout, ensure_ascii=False, indent=4, default=to_jsonable_python)


+def read_csv_to_list(curr_file: str, header=False, strip_trail=True):
+    """
+    Reads in a csv file to a list of list. If header is True, it returns a
+    tuple with (header row, all rows)
+    ARGS:
+      curr_file: path to the current csv file.
+    RETURNS:
+      List of list where the component lists are the rows of the file.
+    """
+    logger.debug(f"start read csv: {curr_file}")
+    analysis_list = []
+    with open(curr_file) as f_analysis_file:
+        data_reader = csv.reader(f_analysis_file, delimiter=",")
+        for count, row in enumerate(data_reader):
+            if strip_trail:
+                row = [i.strip() for i in row]
+            analysis_list += [row]
+    if not header:
+        return analysis_list
+    else:
+        return analysis_list[0], analysis_list[1:]
+
+
 def import_class(class_name: str, module_name: str) -> type:
    module = importlib.import_module(module_name)
    a_class = getattr(module, class_name)
@ -573,3 +598,8 @@ def list_files(root: str | Path) -> List[Path]:
    except Exception as e:
        logger.error(f"Error: {e}")
    return files
+
+
+def encode_image(image_path: Path, encoding: str = "utf-8") -> str:
+    with open(str(image_path), "rb") as image_file:
+        return base64.b64encode(image_file.read()).decode(encoding)
--- a/metagpt/utils/token_counter.py
+++ b/metagpt/utils/token_counter.py
@ -29,6 +29,7 @@ TOKEN_COSTS = {
    "gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
    "gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03},
    "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
+    "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
    "glm-3-turbo": {"prompt": 0.0, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
@ -54,6 +55,7 @@ TOKEN_MAX = {
    "gpt-4-turbo-preview": 128000,
    "gpt-4-0125-preview": 128000,
    "gpt-4-1106-preview": 128000,
+    "gpt-4-vision-preview": 128000,
    "gpt-4-1106-vision-preview": 128000,
    "text-embedding-ada-002": 8192,
    "chatglm_turbo": 32768,
@ -82,6 +84,7 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
        "gpt-4-turbo-preview",
        "gpt-4-0125-preview",
        "gpt-4-1106-preview",
+        "gpt-4-vision-preview",
        "gpt-4-1106-vision-preview",
    }:
        tokens_per_message = 3  # # every reply is primed with <|start|>assistant<|message|>
@ -112,7 +115,13 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
-            num_tokens += len(encoding.encode(value))
+            content = value
+            if isinstance(value, list):
+                # for gpt-4v
+                for item in value:
+                    if isinstance(item, dict) and item.get("type") in ["text"]:
+                        content = item.get("text", "")
+            num_tokens += len(encoding.encode(content))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>