Merge branch 'dev' into code_intepreter

This commit is contained in:
yzlin 2024-02-02 20:45:45 +08:00
commit 891e35b92f
108 changed files with 5271 additions and 408 deletions

View file

@ -13,7 +13,9 @@
from __future__ import annotations
import ast
import base64
import contextlib
import csv
import importlib
import inspect
import json
@ -23,11 +25,14 @@ import re
import sys
import traceback
import typing
from io import BytesIO
from pathlib import Path
from typing import Any, List, Tuple, Union
from typing import Any, Callable, List, Tuple, Union
import aiofiles
import loguru
import requests
from PIL import Image
from pydantic_core import to_jsonable_python
from tenacity import RetryCallState, RetryError, _utils
@ -339,6 +344,14 @@ def print_members(module, indent=0):
print(f"{prefix}Method: {name}")
def get_function_schema(func: Callable) -> dict[str, Union[dict, Any, str]]:
sig = inspect.signature(func)
parameters = sig.parameters
return_type = sig.return_annotation
param_schema = {name: parameter.annotation for name, parameter in parameters.items()}
return {"input_params": param_schema, "return_type": return_type, "func_desc": func.__doc__, "func": func}
def parse_recipient(text):
# FIXME: use ActionNode instead.
pattern = r"## Send To:\s*([A-Za-z]+)\s*?" # hard code for now
@ -494,6 +507,29 @@ def write_json_file(json_file: str, data: list, encoding: str = None, indent: in
json.dump(data, fout, ensure_ascii=False, indent=indent, default=to_jsonable_python)
def read_csv_to_list(curr_file: str, header=False, strip_trail=True):
"""
Reads in a csv file to a list of list. If header is True, it returns a
tuple with (header row, all rows)
ARGS:
curr_file: path to the current csv file.
RETURNS:
List of list where the component lists are the rows of the file.
"""
logger.debug(f"start read csv: {curr_file}")
analysis_list = []
with open(curr_file) as f_analysis_file:
data_reader = csv.reader(f_analysis_file, delimiter=",")
for count, row in enumerate(data_reader):
if strip_trail:
row = [i.strip() for i in row]
analysis_list += [row]
if not header:
return analysis_list
else:
return analysis_list[0], analysis_list[1:]
def import_class(class_name: str, module_name: str) -> type:
module = importlib.import_module(module_name)
a_class = getattr(module, class_name)
@ -602,3 +638,45 @@ def list_files(root: str | Path) -> List[Path]:
except Exception as e:
logger.error(f"Error: {e}")
return files
def is_coroutine_func(func: Callable) -> bool:
return inspect.iscoroutinefunction(func)
def load_mc_skills_code(skill_names: list[str] = None, skills_dir: Path = None) -> list[str]:
"""load mincraft skill from js files"""
if not skills_dir:
skills_dir = Path(__file__).parent.absolute()
if skill_names is None:
skill_names = [skill[:-3] for skill in os.listdir(f"{skills_dir}") if skill.endswith(".js")]
skills = [skills_dir.joinpath(f"{skill_name}.js").read_text() for skill_name in skill_names]
return skills
def encode_image(image_path_or_pil: Union[Path, Image], encoding: str = "utf-8") -> str:
"""encode image from file or PIL.Image into base64"""
if isinstance(image_path_or_pil, Image.Image):
buffer = BytesIO()
image_path_or_pil.save(buffer, format="JPEG")
bytes_data = buffer.getvalue()
else:
if not image_path_or_pil.exists():
raise FileNotFoundError(f"{image_path_or_pil} not exists")
with open(str(image_path_or_pil), "rb") as image_file:
bytes_data = image_file.read()
return base64.b64encode(bytes_data).decode(encoding)
def decode_image(img_url_or_b64: str) -> Image:
"""decode image from url or base64 into PIL.Image"""
if img_url_or_b64.startswith("http"):
# image http(s) url
resp = requests.get(img_url_or_b64)
img = Image.open(BytesIO(resp.content))
else:
# image b64_json
b64_data = re.sub("^data:image/.+;base64,", "", img_url_or_b64)
img_data = BytesIO(base64.b64decode(b64_data))
img = Image.open(img_data)
return img

View file

@ -60,23 +60,22 @@ class DependencyFile:
root = self._filename.parent
try:
key = Path(filename).relative_to(root)
key = Path(filename).relative_to(root).as_posix()
except ValueError:
key = filename
skey = re.sub(r"\\+", "/", str(key)) # Compatible with windows path
key = str(key)
if dependencies:
relative_paths = []
for i in dependencies:
try:
s = str(Path(i).relative_to(root))
s = str(Path(i).relative_to(root).as_posix())
except ValueError:
s = str(i)
s = re.sub(r"\\+", "/", s) # Compatible with windows path
relative_paths.append(s)
self._dependencies[skey] = relative_paths
elif skey in self._dependencies:
del self._dependencies[skey]
self._dependencies[key] = relative_paths
elif key in self._dependencies:
del self._dependencies[key]
if persist:
await self.save()
@ -93,7 +92,7 @@ class DependencyFile:
root = self._filename.parent
try:
key = Path(filename).relative_to(root)
key = Path(filename).relative_to(root).as_posix()
except ValueError:
key = filename
return set(self._dependencies.get(str(key), {}))

View file

@ -29,6 +29,7 @@ TOKEN_COSTS = {
"gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
"gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03},
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
"gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03}, # TODO add extra image price calculator
"gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
"glm-3-turbo": {"prompt": 0.0, "completion": 0.0007}, # 128k version, prompt + completion tokens=0.005¥/k-tokens
@ -54,6 +55,7 @@ TOKEN_MAX = {
"gpt-4-turbo-preview": 128000,
"gpt-4-0125-preview": 128000,
"gpt-4-1106-preview": 128000,
"gpt-4-vision-preview": 128000,
"gpt-4-1106-vision-preview": 128000,
"text-embedding-ada-002": 8192,
"chatglm_turbo": 32768,
@ -82,6 +84,7 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
"gpt-4-turbo-preview",
"gpt-4-0125-preview",
"gpt-4-1106-preview",
"gpt-4-vision-preview",
"gpt-4-1106-vision-preview",
}:
tokens_per_message = 3 # # every reply is primed with <|start|>assistant<|message|>
@ -112,7 +115,13 @@ def count_message_tokens(messages, model="gpt-3.5-turbo-0613"):
for message in messages:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(encoding.encode(value))
content = value
if isinstance(value, list):
# for gpt-4v
for item in value:
if isinstance(item, dict) and item.get("type") in ["text"]:
content = item.get("text", "")
num_tokens += len(encoding.encode(content))
if key == "name":
num_tokens += tokens_per_name
num_tokens += 3 # every reply is primed with <|start|>assistant<|message|>