feat: merge geekan:main

This commit is contained in:
莘权 马 2024-03-05 10:59:34 +08:00
commit e22a28215d
102 changed files with 1766 additions and 756 deletions

View file

@ -14,6 +14,7 @@ import re
import uuid
from typing import Callable
import aiohttp.web
import pytest
from metagpt.const import DEFAULT_WORKSPACE_ROOT, TEST_DATA_PATH
@ -171,9 +172,8 @@ def new_filename(mocker):
yield mocker
@pytest.fixture(scope="session")
def search_rsp_cache():
rsp_cache_file_path = TEST_DATA_PATH / "search_rsp_cache.json" # read repo-provided
def _rsp_cache(name):
rsp_cache_file_path = TEST_DATA_PATH / f"{name}.json" # read repo-provided
if os.path.exists(rsp_cache_file_path):
with open(rsp_cache_file_path, "r") as f1:
rsp_cache_json = json.load(f1)
@ -184,6 +184,16 @@ def search_rsp_cache():
json.dump(rsp_cache_json, f2, indent=4, ensure_ascii=False)
@pytest.fixture(scope="session")
def search_rsp_cache():
yield from _rsp_cache("search_rsp_cache")
@pytest.fixture(scope="session")
def mermaid_rsp_cache():
yield from _rsp_cache("mermaid_rsp_cache")
@pytest.fixture
def aiohttp_mocker(mocker):
MockResponse = type("MockResponse", (MockAioResponse,), {})
@ -231,3 +241,32 @@ def search_engine_mocker(aiohttp_mocker, curl_cffi_mocker, httplib2_mocker, sear
aiohttp_mocker.rsp_cache = httplib2_mocker.rsp_cache = curl_cffi_mocker.rsp_cache = search_rsp_cache
aiohttp_mocker.check_funcs = httplib2_mocker.check_funcs = curl_cffi_mocker.check_funcs = check_funcs
yield check_funcs
@pytest.fixture
def http_server():
async def handler(request):
return aiohttp.web.Response(
text="""<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
<title>MetaGPT</title></head><body><h1>MetaGPT</h1></body></html>""",
content_type="text/html",
)
async def start():
server = aiohttp.web.Server(handler)
runner = aiohttp.web.ServerRunner(server)
await runner.setup()
site = aiohttp.web.TCPSite(runner, "localhost", 0)
await site.start()
host, port = site._server.sockets[0].getsockname()
return site, f"http://{host}:{port}"
return start
@pytest.fixture
def mermaid_mocker(aiohttp_mocker, mermaid_rsp_cache):
check_funcs: dict[tuple[str, str], Callable[[dict], str]] = {}
aiohttp_mocker.rsp_cache = mermaid_rsp_cache
aiohttp_mocker.check_funcs = check_funcs
yield check_funcs

File diff suppressed because one or more lines are too long

View file

@ -355,7 +355,7 @@
"code": "print(data.head())"
},
"\n # Context:\n user: run analysis on sklearn iris dataset\n # Task:\n Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to 5 tasks.\n If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.\n If you encounter errors on the current task, revise and output the current single task only.\n Output a list of jsons following the format:\n ```json\n [\n {\n \"task_id\": str = \"unique identifier for a task in plan, can be an ordinal\",\n \"dependent_task_ids\": list[str] = \"ids of tasks prerequisite to this task\",\n \"instruction\": \"what you should do in this task, one short phrase or sentence\",\n },\n ...\n ]\n ```\n ": "```json\n[\n {\n \"task_id\": \"1\",\n \"dependent_task_ids\": [],\n \"instruction\": \"Import the Iris dataset from sklearn.datasets\"\n },\n {\n \"task_id\": \"2\",\n \"dependent_task_ids\": [\"1\"],\n \"instruction\": \"Perform exploratory data analysis to understand the dataset\"\n },\n {\n \"task_id\": \"3\",\n \"dependent_task_ids\": [\"2\"],\n \"instruction\": \"Preprocess the data to prepare it for modeling\"\n },\n {\n \"task_id\": \"4\",\n \"dependent_task_ids\": [\"3\"],\n \"instruction\": \"Split the dataset into training and testing sets\"\n },\n {\n \"task_id\": \"5\",\n \"dependent_task_ids\": [\"4\"],\n \"instruction\": \"Train a classifier using the training set and evaluate it using the test set\"\n }\n]\n```",
"[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Import the Iris dataset from sklearn.datasets\\nTask 2: Perform exploratory data analysis to understand the dataset\\nTask 3: Preprocess the data to prepare it for modeling\\nTask 4: Split the dataset into training and testing sets\\nTask 5: Train a classifier using the training set and evaluate it using the test set\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
"[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Import the Iris dataset from sklearn.datasets\\nTask 2: Perform exploratory data analysis to understand the dataset\\nTask 3: Preprocess the data to prepare it for modeling\\nTask 4: Split the dataset into training and testing sets\\nTask 5: Train a classifier using the training set and evaluate it using the test set\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **email_login**: For logging to an email.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
"task_type": [
"other",
"eda",
@ -366,24 +366,27 @@
]
},
"\n # Context:\n user: \n## User Requirement\nRun data analysis on sklearn Iris dataset, include a plot\n## Context\n\n## Current Plan\n[]\n## Current Task\n{}\n\n # Task:\n Based on the context, write a plan or modify an existing plan of what you should do to achieve the goal. A plan consists of one to 3 tasks.\n If you are modifying an existing plan, carefully follow the instruction, don't make unnecessary changes. Give the whole plan unless instructed to modify only one task of the plan.\n If you encounter errors on the current task, revise and output the current single task only.\n Output a list of jsons following the format:\n ```json\n [\n {\n \"task_id\": str = \"unique identifier for a task in plan, can be an ordinal\",\n \"dependent_task_ids\": list[str] = \"ids of tasks prerequisite to this task\",\n \"instruction\": \"what you should do in this task, one short phrase or sentence\",\n },\n ...\n ]\n ```\n ": "```json\n[\n {\n \"task_id\": \"1\",\n \"dependent_task_ids\": [],\n \"instruction\": \"Load the sklearn Iris dataset.\"\n },\n {\n \"task_id\": \"2\",\n \"dependent_task_ids\": [\"1\"],\n \"instruction\": \"Perform exploratory data analysis on the Iris dataset.\"\n },\n {\n \"task_id\": \"3\",\n \"dependent_task_ids\": [\"2\"],\n \"instruction\": \"Create a plot visualizing the Iris dataset.\"\n }\n]\n```",
"[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Load the sklearn Iris dataset.\\nTask 2: Perform exploratory data analysis on the Iris dataset.\\nTask 3: Create a plot visualizing the Iris dataset.\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
"[{\"role\": \"user\", \"content\": \"\\nPlease assign a task type to each task in the list below from the given categories:\\nTask 1: Load the sklearn Iris dataset.\\nTask 2: Perform exploratory data analysis on the Iris dataset.\\nTask 3: Create a plot visualizing the Iris dataset.\\n\\n## All Task Type:\\n- **eda**: For performing exploratory data analysis\\n- **data_preprocess**: Only for changing value inplace.\\n- **email_login**: For logging to an email.\\n- **feature_engineering**: Only for creating new columns for input data.\\n- **model_train**: Only for training model.\\n- **model_evaluate**: Only for evaluating model.\\n- **stable_diffusion**: Related to text2image, image2image using stable diffusion model.\\n- **image2webpage**: For converting image into webpage code.\\n- **web_scraping**: For scraping data from web pages.\\n- **other**: Any tools not in the defined categories\\n\"}]": {
"task_type": [
"other",
"data_preprocess",
"eda",
"other"
]
},
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"1\\\",\\\"dependent_task_ids\\\":[],\\\"instruction\\\":\\\"Load the sklearn Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "from sklearn.datasets import load_iris\niris_data = load_iris()"
"[{\"role\": \"user\", \"content\": \"\\n## User Requirement:\\nLoad the sklearn Iris dataset.\\n\\n## Task\\nRecommend up to five tools from 'Available Tools' that can help solve the 'User Requirement'. \\n\\n## Available Tools:\\n{'FillMissingValue': 'Completing missing values with simple strategies.', 'MinMaxScale': 'Transform features by scaling each feature to a range, which is (0, 1).', 'StandardScale': 'Standardize features by removing the mean and scaling to unit variance.', 'MaxAbsScale': 'Scale each feature by its maximum absolute value.', 'RobustScale': 'Apply the RobustScaler to scale features using statistics that are robust to outliers.', 'OrdinalEncode': 'Encode categorical features as ordinal integers.', 'OneHotEncode': 'Apply one-hot encoding to specified categorical columns, the original columns will be dropped.', 'LabelEncode': 'Apply label encoding to specified categorical columns in-place.'}\\n\\n## Tool Selection and Instructions:\\n- Select tools most relevant to completing the 'User Requirement'.\\n- If you believe that no tools are suitable, indicate with an empty list.\\n- Only list the names of the tools, not the full schema of each tool.\\n- Ensure selected tools are listed in 'Available Tools'.\\n\"}]": {
"recommend_tools": []
},
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "from sklearn import datasets\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\ndf_iris = pd.DataFrame(data=iris.data, columns=iris.feature_names)\ndf_iris['target'] = iris.target\n\n# Display basic information about the dataset\nprint(df_iris.info())\n\n# Display statistical summary of the dataset\nprint(df_iris.describe())\n\n# Display the first few rows of the dataset\nprint(df_iris.head())\n\n# Display the distribution of the target variable\ntarget_counts = df_iris['target'].value_counts()\nprint(target_counts)"
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"data_preprocess\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"1\\\",\\\"dependent_task_ids\\\":[],\\\"instruction\\\":\\\"Load the sklearn Iris dataset.\\\",\\\"task_type\\\":\\\"data_preprocess\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about data preprocessing, please note the following:\\n- Monitor data types per column, applying appropriate methods.\\n- Ensure operations are on existing dataset columns.\\n- Avoid writing processed data to files.\\n- Avoid any change to label column, such as standardization, etc.\\n- Prefer alternatives to one-hot encoding for categorical data.\\n- Only encode or scale necessary columns to allow for potential feature-specific engineering tasks (like time_extract, binning, extraction, etc.) later.\\n- Each step do data preprocessing to train, must do same for test separately at the same time.\\n\\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "from sklearn.datasets import load_iris\niris_data = load_iris()\nX, y = iris_data.data, iris_data.target"
},
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "from sklearn import datasets\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# Load Iris dataset\niris = datasets.load_iris()\niris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)\niris_df['target'] = iris.target\niris_df['target_name'] = iris_df['target'].apply(lambda x: iris.target_names[x])\n\n# Plotting\nfig, ax = plt.subplots(figsize=(12, 8))\nfor target, target_name in zip(iris.target_names, iris.target_names):\n subset = iris_df[iris_df['target_name'] == target_name]\n ax.scatter(subset[iris.feature_names[0]], subset[iris.feature_names[1]], label=target_name)\n\nax.set_xlabel(iris.feature_names[0])\nax.set_ylabel(iris.feature_names[1])\nax.legend()\nplt.show()"
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"data_preprocess\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\\nX, y = iris_data.data, iris_data.target\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"2\\\",\\\"dependent_task_ids\\\":[\\\"1\\\"],\\\"instruction\\\":\\\"Perform exploratory data analysis on the Iris dataset.\\\",\\\"task_type\\\":\\\"eda\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about exploratory data analysis, please note the following:\\n- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.\\n- Remember to `import numpy as np` before using Numpy functions.\\n\\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "import numpy as np\nimport pandas as pd\n\n# Convert the sklearn dataset to a pandas DataFrame for easier manipulation\nfeature_names = iris_data.feature_names\niris_df = pd.DataFrame(X, columns=feature_names)\n\n# Add target column to the DataFrame\niris_df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)\n\n# Display basic information about the dataset\niris_info = iris_df.info()\n\n# Display basic statistics about the dataset\niris_description = iris_df.describe()\n\n# Check for missing values\nmissing_values = iris_df.isnull().sum()\n\n# Distinguish column types\nnumerical_cols = iris_df.select_dtypes(include=[np.number]).columns.tolist()\ncategorical_cols = iris_df.select_dtypes(include=['category']).columns.tolist()\n\n# Display the first few rows of the DataFrame\nhead = iris_df.head()\n\n# Output the results\n(iris_info, iris_description, missing_values, numerical_cols, categorical_cols, head)"
},
"[{\"role\": \"user\", \"content\": \"\\n# Background\\nAs a data scientist, you need to help user to achieve their goal [This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.] step-by-step in an continuous Jupyter notebook.\\n\\n## Done Tasks\\n```python\\n\\n```end\\n\\n## Current Task\\nPerform exploratory data analysis on the train dataset to understand the features and target variable.\\n\\n# Latest Data Info\\nLatest data info after previous tasks:\\n\\n\\n# Task\\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Output Example:\\nwhen current task is \\\"train a lightgbm model on training data\\\", the code can be like:\\n```python\\n# Step 1: check data type and convert to numeric\\nobj_cols = train.select_dtypes(include='object').columns.tolist()\\n\\nfor col in obj_cols:\\n encoder = LabelEncoder()\\n train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])\\n test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')\\n test[col] = encoder.transform(test[col])\\n\\n# Step 2: train lightgbm model\\nmodel = LGBMClassifier()\\nmodel.fit(train, y_train)\\n```end\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n\"}]": {
"code": "# Perform exploratory data analysis on the train dataset\ndf_train = pd.read_csv('tests/data/ml_datasets/titanic/split_train.csv')\n\n# Display the first few rows of the dataset\ndisplay(df_train.head())\n\n# Summary statistics for numerical features\ndisplay(df_train.describe())\n\n# Summary information about the dataset including the data types and number of non-null values\ndisplay(df_train.info())\n\n# Distribution of the target variable 'Survived'\nsurvival_counts = df_train['Survived'].value_counts()\nprint(\"Survival counts:\\n\", survival_counts)\n\n# Visualizations\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Distribution of the target variable\nsns.countplot(x='Survived', data=df_train)\nplt.title('Distribution of Survival')\nplt.show()\n\n# Correlation matrix heatmap to understand the relationship between features\nplt.figure(figsize=(10, 8))\nsns.heatmap(df_train.corr(), annot=True, fmt='.2f')\nplt.title('Correlation Matrix')\nplt.show()\n\n# Pairplot to visualize the pairwise relationships between features\nsns.pairplot(df_train, hue='Survived')\nplt.title('Pairplot of Features')\nplt.show()"
"[{\"role\": \"system\", \"content\": \"You are Code Interpreter, a world-class programmer that can complete any goal by executing code. Strictly follow the plan and generate code step by step. Each step of the code will be executed on the user's machine, and the user will provide the code execution results to you.**Notice: The code for the next step depends on the code for the previous step. Must reuse variables in the lastest other code directly, dont creat it again, it is very import for you. Use !pip install in a standalone block to install missing packages.Usually the libraries you need are already installed.Dont check if packages already imported.**\"}, {\"role\": \"user\", \"content\": \"\\n## User Requirement\\nRun data analysis on sklearn Iris dataset, include a plot\\n## Context\\n\\n## Current Plan\\n[\\n {\\n \\\"task_id\\\": \\\"1\\\",\\n \\\"dependent_task_ids\\\": [],\\n \\\"instruction\\\": \\\"Load the sklearn Iris dataset.\\\",\\n \\\"task_type\\\": \\\"data_preprocess\\\",\\n \\\"code\\\": \\\"from sklearn.datasets import load_iris\\\\niris_data = load_iris()\\\\nX, y = iris_data.data, iris_data.target\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"2\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"1\\\"\\n ],\\n \\\"instruction\\\": \\\"Perform exploratory data analysis on the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"eda\\\",\\n \\\"code\\\": \\\"import numpy as np\\\\nimport pandas as pd\\\\n\\\\n# Convert the sklearn dataset to a pandas DataFrame for easier manipulation\\\\nfeature_names = iris_data.feature_names\\\\niris_df = pd.DataFrame(X, columns=feature_names)\\\\n\\\\n# Add target column to the DataFrame\\\\niris_df['species'] = pd.Categorical.from_codes(iris_data.target, iris_data.target_names)\\\\n\\\\n# Display basic information about the dataset\\\\niris_info = iris_df.info()\\\\n\\\\n# Display basic statistics about the dataset\\\\niris_description = iris_df.describe()\\\\n\\\\n# Check for missing values\\\\nmissing_values = iris_df.isnull().sum()\\\\n\\\\n# Distinguish column types\\\\nnumerical_cols = iris_df.select_dtypes(include=[np.number]).columns.tolist()\\\\ncategorical_cols = iris_df.select_dtypes(include=['category']).columns.tolist()\\\\n\\\\n# Display the first few rows of the DataFrame\\\\nhead = iris_df.head()\\\\n\\\\n# Output the results\\\\n(iris_info, iris_description, missing_values, numerical_cols, categorical_cols, head)\\\",\\n \\\"result\\\": \\\"a successful run\\\",\\n \\\"is_success\\\": true,\\n \\\"is_finished\\\": true\\n },\\n {\\n \\\"task_id\\\": \\\"3\\\",\\n \\\"dependent_task_ids\\\": [\\n \\\"2\\\"\\n ],\\n \\\"instruction\\\": \\\"Create a plot visualizing the Iris dataset.\\\",\\n \\\"task_type\\\": \\\"other\\\",\\n \\\"code\\\": \\\"\\\",\\n \\\"result\\\": \\\"\\\",\\n \\\"is_success\\\": false,\\n \\\"is_finished\\\": false\\n }\\n]\\n## Current Task\\n{\\\"task_id\\\":\\\"3\\\",\\\"dependent_task_ids\\\":[\\\"2\\\"],\\\"instruction\\\":\\\"Create a plot visualizing the Iris dataset.\\\",\\\"task_type\\\":\\\"other\\\",\\\"code\\\":\\\"\\\",\\\"result\\\":\\\"\\\",\\\"is_success\\\":false,\\\"is_finished\\\":false}\\n\"}, {\"role\": \"user\", \"content\": \"\\n# Instruction\\nWrite complete code for 'Current Task'. And avoid duplicating code from finished tasks, such as repeated import of packages, reading data, etc.\\nSpecifically, \\n\\n# Capabilities\\n- You can utilize pre-defined tools in any code lines from 'Available Tools' in the form of Python Class.\\n- You can freely combine the use of any other public packages, like sklearn, numpy, pandas, etc..\\n\\n# Available Tools (can be empty):\\nEach Class tool is described in JSON format. When you call a tool, import the tool first.\\n{}\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n- Always prioritize using pre-defined tools for the same functionality.\\n\"}]": {
"code": "import matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Set the style of seaborn\nsns.set(style='whitegrid')\n\n# Create a pairplot to visualize the relationships between the features\npairplot = sns.pairplot(iris_df, hue='species')\nplt.show()"
},
"[{\"role\": \"user\", \"content\": \"\\n# Background\\nAs a data scientist, you need to help user to achieve their goal [This is a titanic passenger survival dataset, your goal is to predict passenger survival outcome. The target column is Survived. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report accuracy on the eval data. Train data path: 'tests/data/ml_datasets/titanic/split_train.csv', eval data path: 'tests/data/ml_datasets/titanic/split_eval.csv'.] step-by-step in an continuous Jupyter notebook.\\n\\n## Done Tasks\\n```python\\n\\n```end\\n\\n## Current Task\\nPerform exploratory data analysis on the train dataset to understand the features and target variable.\\n\\n# Latest Data Info\\nLatest data info after previous tasks:\\n\\n\\n# Task\\nWrite complete code for 'Current Task'. And avoid duplicating code from 'Done Tasks', such as repeated import of packages, reading data, etc.\\nSpecifically, \\nThe current task is about exploratory data analysis, please note the following:\\n- Distinguish column types with `select_dtypes` for tailored analysis and visualization, such as correlation.\\n- Remember to `import numpy as np` before using Numpy functions.\\n\\n\\n# Output Example:\\nwhen current task is \\\"train a lightgbm model on training data\\\", the code can be like:\\n```python\\n# Step 1: check data type and convert to numeric\\nobj_cols = train.select_dtypes(include='object').columns.tolist()\\n\\nfor col in obj_cols:\\n encoder = LabelEncoder()\\n train[col] = encoder.fit_transform(train[col].unique().tolist() + ['unknown'])\\n test[col] = test[col].apply(lambda x: x if x in encoder.classes_ else 'unknown')\\n test[col] = encoder.transform(test[col])\\n\\n# Step 2: train lightgbm model\\nmodel = LGBMClassifier()\\nmodel.fit(train, y_train)\\n```end\\n\\n# Constraints:\\n- Ensure the output new code is executable in the same Jupyter notebook with previous tasks code have been executed.\\n\"}]": {
"code": "# Perform exploratory data analysis on the train dataset\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n# Read the train dataset\ntrain_data = pd.read_csv('tests/data/ml_datasets/titanic/split_train.csv')\n\n# Display the first few rows of the dataset\ndisplay(train_data.head())\n\n# Summary statistics for numerical features\nprint(train_data.describe())\n\n# Summary statistics for categorical features\nprint(train_data.describe(include=['O']))\n\n# Check for missing values\nprint(train_data.isnull().sum())\n\n# Distribution of the target variable\nsns.countplot(x='Survived', data=train_data)\nplt.title('Distribution of Survival on the Titanic')\nplt.show()\n\n# Correlation matrix for numerical features\nnumerical_features = train_data.select_dtypes(include=[np.number])\ncorrelation_matrix = numerical_features.corr()\nplt.figure(figsize=(10, 8))\nsns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)\nplt.title('Correlation Matrix for Numerical Features')\nplt.show()\n\n# Pairplot for selected features\nselected_features = ['Survived', 'Pclass', 'Age', 'SibSp', 'Parch', 'Fare']\nsns.pairplot(train_data[selected_features], hue='Survived')\nplt.show()\n\n# Boxplot for categorical features vs Survived\nfor column in train_data.select_dtypes(include=['O']).columns:\n if column != 'Survived':\n plt.figure(figsize=(10, 5))\n sns.boxplot(x='Survived', y=column, data=train_data)\n plt.title(f'Survived vs {column}')\n plt.show()\n"
},
"[{\"role\": \"system\", \"content\": \"You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Write your full implementation \"}, {\"role\": \"user\", \"content\": \"\\nHere is an example for you.\\n\\nExample 1:\\n[previous impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n \\\"\\\"\\\"\\n Given integers a and b, return the total value of a and b.\\n \\\"\\\"\\\"\\n return a - b\\n```\\n\\n[runtime Error]:\\nTested passed:\\n\\nTests failed:\\nassert add(1, 2) == 3 # output: -1\\nassert add(1, 2) == 4 # output: -1\\n\\n[reflection on previous impl]:\\nThe implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.\\n\\n[improved impl]:\\n```python\\ndef add(a: int, b: int) -> int:\\n \\\"\\\"\\\"\\n Given integers a and b, return the total value of a and b.\\n \\\"\\\"\\\"\\n return a + b\\n```\\n\\n[context]\\nSolve the problem in Python:\\ndef sort_array(arr):\\n \\\"\\\"\\\"\\n In this Kata, you have to sort an array of non-negative integers according to\\n number of ones in their binary representation in ascending order.\\n For similar number of ones, sort based on decimal value.\\n\\n It must be implemented like this:\\n >>> sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5]\\n >>> sort_array([-2, -3, -4, -5, -6]) == [-6, -5, -4, -3, -2]\\n >>> sort_array([1, 0, 2, 3, 4]) [0, 1, 2, 3, 4]\\n \\\"\\\"\\\"\\n\\n\\n[previous impl]\\n\\ndef sort_array(arr):\\n # Helper function to count the number of ones in the binary representation\\n def count_ones(n):\\n return bin(n).count('1')\\n \\n # Sort the array using a custom key function\\n # The key function returns a tuple (number of ones, value) for each element\\n # This ensures that if two elements have the same number of ones, they are sorted by their value\\n sorted_arr = sorted(arr, key=lambda x: (count_ones(x), x))\\n \\n return sorted_arr\\n```\\n\\n[runtime Error]\\n[user: Tested passed:\\n\\nTests failed:\\nassert sort_array([1, 5, 2, 3, 4]) == [1, 2, 3, 4, 5] # output: [1, 2, 4, 3, 5]\\n]\\n\\nAnalysis the error step by step, provide me improve method and code. Remember to follow [context] requirement. Don't forget write code for steps behind the error step.\\n[reflection on previous impl]:\\nxxx\\n\"}]": {
"reflection": "The implementation failed the test case where the input array is [1, 5, 2, 3, 4]. The issue arises because the code does not handle the sorting of elements with the same number of ones in their binary representation correctly. According to the context, for similar number of ones, the elements should be sorted based on their decimal value in ascending order. However, the output [1, 2, 4, 3, 5] shows that 3 and 4 are not in the correct order, indicating that the sorting by decimal value is not functioning as intended. To fix this issue, we should ensure that the sorting is stable, so that when two elements have the same number of ones, they maintain their relative order if their decimal values are already in ascending order.",

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.ci.ask_review import AskReview
from metagpt.actions.mi.ask_review import AskReview
@pytest.mark.asyncio

View file

@ -5,7 +5,7 @@
import pytest
from metagpt.actions.ci.debug_code import DebugCode
from metagpt.actions.mi.debug_code import DebugCode
from metagpt.schema import Message
ErrorStr = """Tested passed:

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode, truncate
from metagpt.actions.mi.execute_nb_code import ExecuteNbCode, truncate
@pytest.mark.asyncio

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.ci.ml_action import WriteCodeWithToolsML
from metagpt.actions.mi.ml_action import WriteCodeWithToolsML
from metagpt.schema import Plan, Task

View file

@ -2,8 +2,8 @@ import asyncio
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.actions.ci.write_analysis_code import (
from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
from metagpt.actions.mi.write_analysis_code import (
WriteCodeWithoutTools,
WriteCodeWithTools,
)

View file

@ -1,6 +1,6 @@
import pytest
from metagpt.actions.ci.write_plan import (
from metagpt.actions.mi.write_plan import (
Plan,
Task,
WritePlan,

View file

@ -14,6 +14,7 @@ from metagpt.actions.rebuild_class_view import RebuildClassView
from metagpt.llm import LLM
@pytest.mark.skip
@pytest.mark.asyncio
async def test_rebuild(context):
action = RebuildClassView(

View file

@ -6,6 +6,9 @@
@File : test_faiss_store.py
"""
from typing import Optional
import numpy as np
import pytest
from metagpt.const import EXAMPLE_PATH
@ -14,8 +17,17 @@ from metagpt.logs import logger
from metagpt.roles import Sales
def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
num = len(texts)
embeds = np.random.randint(1, 100, size=(num, 1536)) # 1536: openai embedding dim
embeds = (embeds - embeds.mean(axis=0)) / (embeds.std(axis=0))
return embeds
@pytest.mark.asyncio
async def test_search_json():
async def test_search_json(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
store = FaissStore(EXAMPLE_PATH / "example.json")
role = Sales(profile="Sales", store=store)
query = "Which facial cleanser is good for oily skin?"
@ -24,7 +36,9 @@ async def test_search_json():
@pytest.mark.asyncio
async def test_search_xlsx():
async def test_search_xlsx(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
store = FaissStore(EXAMPLE_PATH / "example.xlsx")
role = Sales(profile="Sales", store=store)
query = "Which facial cleanser is good for oily skin?"
@ -33,7 +47,9 @@ async def test_search_xlsx():
@pytest.mark.asyncio
async def test_write():
async def test_write(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
store = FaissStore(EXAMPLE_PATH / "example.xlsx", meta_col="Answer", content_col="Question")
_faiss_store = store.write()
assert _faiss_store.docstore

View file

@ -0,0 +1,33 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc :
from typing import Optional
import numpy as np
dim = 1536 # openai embedding dim
text_embed_arr = [
{"text": "Write a cli snake game", "embed": np.zeros(shape=[1, dim])}, # mock data, same as below
{"text": "Write a game of cli snake", "embed": np.zeros(shape=[1, dim])},
{"text": "Write a 2048 web game", "embed": np.ones(shape=[1, dim])},
{"text": "Write a Battle City", "embed": np.ones(shape=[1, dim])},
{
"text": "The user has requested the creation of a command-line interface (CLI) snake game",
"embed": np.zeros(shape=[1, dim]),
},
{"text": "The request is command-line interface (CLI) snake game", "embed": np.zeros(shape=[1, dim])},
{
"text": "Incorporate basic features of a snake game such as scoring and increasing difficulty",
"embed": np.ones(shape=[1, dim]),
},
]
text_idx_dict = {item["text"]: idx for idx, item in enumerate(text_embed_arr)}
def mock_openai_embed_documents(self, texts: list[str], chunk_size: Optional[int] = 0) -> list[list[float]]:
idx = text_idx_dict.get(texts[0])
embed = text_embed_arr[idx].get("embed")
return embed

View file

@ -4,20 +4,22 @@
@Desc : unittest of `metagpt/memory/longterm_memory.py`
"""
import os
import pytest
from metagpt.actions import UserRequirement
from metagpt.config2 import config
from metagpt.memory.longterm_memory import LongTermMemory
from metagpt.roles.role import RoleContext
from metagpt.schema import Message
os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key)
from tests.metagpt.memory.mock_text_embed import (
mock_openai_embed_documents,
text_embed_arr,
)
def test_ltm_search():
def test_ltm_search(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
role_id = "UTUserLtm(Product Manager)"
from metagpt.environment import Environment
@ -27,20 +29,20 @@ def test_ltm_search():
ltm = LongTermMemory()
ltm.recover_memory(role_id, rc)
idea = "Write a cli snake game"
idea = text_embed_arr[0].get("text", "Write a cli snake game")
message = Message(role="User", content=idea, cause_by=UserRequirement)
news = ltm.find_news([message])
assert len(news) == 1
ltm.add(message)
sim_idea = "Write a game of cli snake"
sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")
sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
news = ltm.find_news([sim_message])
assert len(news) == 0
ltm.add(sim_message)
new_idea = "Write a 2048 web game"
new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
news = ltm.find_news([new_message])
assert len(news) == 1
@ -56,7 +58,7 @@ def test_ltm_search():
news = ltm_new.find_news([sim_message])
assert len(news) == 0
new_idea = "Write a Battle City"
new_idea = text_embed_arr[3].get("text", "Write a Battle City")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
news = ltm_new.find_news([new_message])
assert len(news) == 1

View file

@ -4,23 +4,25 @@
@Desc : the unittests of metagpt/memory/memory_storage.py
"""
import os
import shutil
from pathlib import Path
from typing import List
from metagpt.actions import UserRequirement, WritePRD
from metagpt.actions.action_node import ActionNode
from metagpt.config2 import config
from metagpt.const import DATA_PATH
from metagpt.memory.memory_storage import MemoryStorage
from metagpt.schema import Message
os.environ.setdefault("OPENAI_API_KEY", config.get_openai_llm().api_key)
from tests.metagpt.memory.mock_text_embed import (
mock_openai_embed_documents,
text_embed_arr,
)
def test_idea_message():
idea = "Write a cli snake game"
def test_idea_message(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
idea = text_embed_arr[0].get("text", "Write a cli snake game")
role_id = "UTUser1(Product Manager)"
message = Message(role="User", content=idea, cause_by=UserRequirement)
@ -33,12 +35,12 @@ def test_idea_message():
memory_storage.add(message)
assert memory_storage.is_initialized is True
sim_idea = "Write a game of cli snake"
sim_idea = text_embed_arr[1].get("text", "Write a game of cli snake")
sim_message = Message(role="User", content=sim_idea, cause_by=UserRequirement)
new_messages = memory_storage.search_dissimilar(sim_message)
assert len(new_messages) == 0 # similar, return []
new_idea = "Write a 2048 web game"
new_idea = text_embed_arr[2].get("text", "Write a 2048 web game")
new_message = Message(role="User", content=new_idea, cause_by=UserRequirement)
new_messages = memory_storage.search_dissimilar(new_message)
assert new_messages[0].content == message.content
@ -47,13 +49,17 @@ def test_idea_message():
assert memory_storage.is_initialized is False
def test_actionout_message():
def test_actionout_message(mocker):
mocker.patch("langchain_community.embeddings.openai.OpenAIEmbeddings.embed_documents", mock_openai_embed_documents)
out_mapping = {"field1": (str, ...), "field2": (List[str], ...)}
out_data = {"field1": "field1 value", "field2": ["field2 value1", "field2 value2"]}
ic_obj = ActionNode.create_model_class("prd", out_mapping)
role_id = "UTUser2(Architect)"
content = "The user has requested the creation of a command-line interface (CLI) snake game"
content = text_embed_arr[4].get(
"text", "The user has requested the creation of a command-line interface (CLI) snake game"
)
message = Message(
content=content, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD
) # WritePRD as test action
@ -67,12 +73,14 @@ def test_actionout_message():
memory_storage.add(message)
assert memory_storage.is_initialized is True
sim_conent = "The request is command-line interface (CLI) snake game"
sim_conent = text_embed_arr[5].get("text", "The request is command-line interface (CLI) snake game")
sim_message = Message(content=sim_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
new_messages = memory_storage.search_dissimilar(sim_message)
assert len(new_messages) == 0 # similar, return []
new_conent = "Incorporate basic features of a snake game such as scoring and increasing difficulty"
new_conent = text_embed_arr[6].get(
"text", "Incorporate basic features of a snake game such as scoring and increasing difficulty"
)
new_message = Message(content=new_conent, instruct_content=ic_obj(**out_data), role="user", cause_by=WritePRD)
new_messages = memory_storage.search_dissimilar(new_message)
assert new_messages[0].content == message.content

View file

@ -42,3 +42,17 @@ mock_llm_config_zhipu = LLMConfig(
model="mock_zhipu_model",
proxy="http://localhost:8080",
)
mock_llm_config_spark = LLMConfig(
api_type="spark",
app_id="xxx",
api_key="xxx",
api_secret="xxx",
domain="generalv2",
base_url="wss://spark-api.xf-yun.com/v3.1/chat",
)
mock_llm_config_qianfan = LLMConfig(api_type="qianfan", access_key="xxx", secret_key="xxx", model="ERNIE-Bot-turbo")
mock_llm_config_dashscope = LLMConfig(api_type="dashscope", api_key="xxx", model="qwen-max")

View file

@ -0,0 +1,145 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : default request & response data for provider unittest
from dashscope.api_entities.dashscope_response import (
DashScopeAPIResponse,
GenerationOutput,
GenerationResponse,
GenerationUsage,
)
from openai.types.chat.chat_completion import (
ChatCompletion,
ChatCompletionMessage,
Choice,
)
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.chat.chat_completion_chunk import Choice as AChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage
from qianfan.resources.typing import QfResponse
from metagpt.provider.base_llm import BaseLLM
prompt = "who are you?"
messages = [{"role": "user", "content": prompt}]
resp_cont_tmpl = "I'm {name}"
default_resp_cont = resp_cont_tmpl.format(name="GPT")
# part of whole ChatCompletion of openai like structure
def get_part_chat_completion(name: str) -> dict:
part_chat_completion = {
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": resp_cont_tmpl.format(name=name),
},
"finish_reason": "stop",
}
],
"usage": {"completion_tokens": 22, "prompt_tokens": 19, "total_tokens": 41},
}
return part_chat_completion
def get_openai_chat_completion(name: str) -> ChatCompletion:
openai_chat_completion = ChatCompletion(
id="cmpl-a6652c1bb181caae8dd19ad8",
model="xx/xxx",
object="chat.completion",
created=1703300855,
choices=[
Choice(
finish_reason="stop",
index=0,
message=ChatCompletionMessage(role="assistant", content=resp_cont_tmpl.format(name=name)),
logprobs=None,
)
],
usage=CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202),
)
return openai_chat_completion
def get_openai_chat_completion_chunk(name: str, usage_as_dict: bool = False) -> ChatCompletionChunk:
usage = CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202)
usage = usage if not usage_as_dict else usage.model_dump()
openai_chat_completion_chunk = ChatCompletionChunk(
id="cmpl-a6652c1bb181caae8dd19ad8",
model="xx/xxx",
object="chat.completion.chunk",
created=1703300855,
choices=[
AChoice(
delta=ChoiceDelta(role="assistant", content=resp_cont_tmpl.format(name=name)),
finish_reason="stop",
index=0,
logprobs=None,
)
],
usage=usage,
)
return openai_chat_completion_chunk
# For gemini
gemini_messages = [{"role": "user", "parts": prompt}]
# For QianFan
qf_jsonbody_dict = {
"id": "as-4v1h587fyv",
"object": "chat.completion",
"created": 1695021339,
"result": "",
"is_truncated": False,
"need_clear_history": False,
"usage": {"prompt_tokens": 7, "completion_tokens": 15, "total_tokens": 22},
}
def get_qianfan_response(name: str) -> QfResponse:
qf_jsonbody_dict["result"] = resp_cont_tmpl.format(name=name)
return QfResponse(code=200, body=qf_jsonbody_dict)
# For DashScope
def get_dashscope_response(name: str) -> GenerationResponse:
return GenerationResponse.from_api_response(
DashScopeAPIResponse(
status_code=200,
output=GenerationOutput(
**{
"text": "",
"finish_reason": "",
"choices": [
{
"finish_reason": "stop",
"message": {"role": "assistant", "content": resp_cont_tmpl.format(name=name)},
}
],
}
),
usage=GenerationUsage(**{"input_tokens": 12, "output_tokens": 98, "total_tokens": 110}),
)
)
# For llm general chat functions call
async def llm_general_chat_funcs_test(llm: BaseLLM, prompt: str, messages: list[dict], resp_cont: str):
resp = await llm.aask(prompt, stream=False)
assert resp == resp_cont
resp = await llm.aask(prompt)
assert resp == resp_cont
resp = await llm.acompletion_text(messages, stream=False)
assert resp == resp_cont
resp = await llm.acompletion_text(messages, stream=True)
assert resp == resp_cont

View file

@ -8,25 +8,25 @@ from anthropic.resources.completions import Completion
from metagpt.provider.anthropic_api import Claude2
from tests.metagpt.provider.mock_llm_config import mock_llm_config
from tests.metagpt.provider.req_resp_const import prompt, resp_cont_tmpl
prompt = "who are you"
resp = "I'am Claude2"
resp_cont = resp_cont_tmpl.format(name="Claude")
def mock_anthropic_completions_create(self, model: str, prompt: str, max_tokens_to_sample: int) -> Completion:
return Completion(id="xx", completion=resp, model="claude-2", stop_reason="stop_sequence", type="completion")
return Completion(id="xx", completion=resp_cont, model="claude-2", stop_reason="stop_sequence", type="completion")
async def mock_anthropic_acompletions_create(self, model: str, prompt: str, max_tokens_to_sample: int) -> Completion:
return Completion(id="xx", completion=resp, model="claude-2", stop_reason="stop_sequence", type="completion")
return Completion(id="xx", completion=resp_cont, model="claude-2", stop_reason="stop_sequence", type="completion")
def test_claude2_ask(mocker):
mocker.patch("anthropic.resources.completions.Completions.create", mock_anthropic_completions_create)
assert resp == Claude2(mock_llm_config).ask(prompt)
assert resp_cont == Claude2(mock_llm_config).ask(prompt)
@pytest.mark.asyncio
async def test_claude2_aask(mocker):
mocker.patch("anthropic.resources.completions.AsyncCompletions.create", mock_anthropic_acompletions_create)
assert resp == await Claude2(mock_llm_config).aask(prompt)
assert resp_cont == await Claude2(mock_llm_config).aask(prompt)

View file

@ -11,21 +11,13 @@ import pytest
from metagpt.configs.llm_config import LLMConfig
from metagpt.provider.base_llm import BaseLLM
from metagpt.schema import Message
from tests.metagpt.provider.req_resp_const import (
default_resp_cont,
get_part_chat_completion,
prompt,
)
default_chat_resp = {
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "I'am GPT",
},
"finish_reason": "stop",
}
]
}
prompt_msg = "who are you"
resp_content = default_chat_resp["choices"][0]["message"]["content"]
name = "GPT"
class MockBaseLLM(BaseLLM):
@ -33,16 +25,13 @@ class MockBaseLLM(BaseLLM):
pass
def completion(self, messages: list[dict], timeout=3):
return default_chat_resp
return get_part_chat_completion(name)
async def acompletion(self, messages: list[dict], timeout=3):
return default_chat_resp
return get_part_chat_completion(name)
async def acompletion_text(self, messages: list[dict], stream=False, timeout=3) -> str:
return resp_content
async def close(self):
return default_chat_resp
return default_resp_cont
def test_base_llm():
@ -86,25 +75,25 @@ def test_base_llm():
choice_text = base_llm.get_choice_text(openai_funccall_resp)
assert choice_text == openai_funccall_resp["choices"][0]["message"]["content"]
# resp = base_llm.ask(prompt_msg)
# assert resp == resp_content
# resp = base_llm.ask(prompt)
# assert resp == default_resp_cont
# resp = base_llm.ask_batch([prompt_msg])
# assert resp == resp_content
# resp = base_llm.ask_batch([prompt])
# assert resp == default_resp_cont
# resp = base_llm.ask_code([prompt_msg])
# assert resp == resp_content
# resp = base_llm.ask_code([prompt])
# assert resp == default_resp_cont
@pytest.mark.asyncio
async def test_async_base_llm():
base_llm = MockBaseLLM()
resp = await base_llm.aask(prompt_msg)
assert resp == resp_content
resp = await base_llm.aask(prompt)
assert resp == default_resp_cont
resp = await base_llm.aask_batch([prompt_msg])
assert resp == resp_content
resp = await base_llm.aask_batch([prompt])
assert resp == default_resp_cont
# resp = await base_llm.aask_code([prompt_msg])
# assert resp == resp_content
# resp = await base_llm.aask_code([prompt])
# assert resp == default_resp_cont

View file

@ -0,0 +1,73 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : the unittest of DashScopeLLM
from typing import AsyncGenerator, Union
import pytest
from dashscope.api_entities.dashscope_response import GenerationResponse
from metagpt.provider.dashscope_api import DashScopeLLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config_dashscope
from tests.metagpt.provider.req_resp_const import (
get_dashscope_response,
llm_general_chat_funcs_test,
messages,
prompt,
resp_cont_tmpl,
)
name = "qwen-max"
resp_cont = resp_cont_tmpl.format(name=name)
@classmethod
def mock_dashscope_call(
cls,
messages: list[dict],
model: str,
api_key: str,
result_format: str,
incremental_output: bool = True,
stream: bool = False,
) -> GenerationResponse:
return get_dashscope_response(name)
@classmethod
async def mock_dashscope_acall(
cls,
messages: list[dict],
model: str,
api_key: str,
result_format: str,
incremental_output: bool = True,
stream: bool = False,
) -> Union[AsyncGenerator[GenerationResponse, None], GenerationResponse]:
resps = [get_dashscope_response(name)]
if stream:
async def aresp_iterator(resps: list[GenerationResponse]):
for resp in resps:
yield resp
return aresp_iterator(resps)
else:
return resps[0]
@pytest.mark.asyncio
async def test_dashscope_acompletion(mocker):
mocker.patch("dashscope.aigc.generation.Generation.call", mock_dashscope_call)
mocker.patch("metagpt.provider.dashscope_api.AGeneration.acall", mock_dashscope_acall)
dashscope_llm = DashScopeLLM(mock_llm_config_dashscope)
resp = dashscope_llm.completion(messages)
assert resp.choices[0]["message"]["content"] == resp_cont
resp = await dashscope_llm.acompletion(messages)
assert resp.choices[0]["message"]["content"] == resp_cont
await llm_general_chat_funcs_test(dashscope_llm, prompt, messages, resp_cont)

View file

@ -1,114 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : the unittest of fireworks api
import pytest
from openai.types.chat.chat_completion import (
ChatCompletion,
ChatCompletionMessage,
Choice,
)
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.chat.chat_completion_chunk import Choice as AChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage
from metagpt.provider.fireworks_api import (
MODEL_GRADE_TOKEN_COSTS,
FireworksCostManager,
FireworksLLM,
)
from metagpt.utils.cost_manager import Costs
from tests.metagpt.provider.mock_llm_config import mock_llm_config
resp_content = "I'm fireworks"
default_resp = ChatCompletion(
id="cmpl-a6652c1bb181caae8dd19ad8",
model="accounts/fireworks/models/llama-v2-13b-chat",
object="chat.completion",
created=1703300855,
choices=[
Choice(
finish_reason="stop",
index=0,
message=ChatCompletionMessage(role="assistant", content=resp_content),
logprobs=None,
)
],
usage=CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202),
)
default_resp_chunk = ChatCompletionChunk(
id=default_resp.id,
model=default_resp.model,
object="chat.completion.chunk",
created=default_resp.created,
choices=[
AChoice(
delta=ChoiceDelta(content=resp_content, role="assistant"),
finish_reason="stop",
index=0,
logprobs=None,
)
],
usage=dict(default_resp.usage),
)
prompt_msg = "who are you"
messages = [{"role": "user", "content": prompt_msg}]
def test_fireworks_costmanager():
cost_manager = FireworksCostManager()
assert MODEL_GRADE_TOKEN_COSTS["-1"] == cost_manager.model_grade_token_costs("test")
assert MODEL_GRADE_TOKEN_COSTS["-1"] == cost_manager.model_grade_token_costs("xxx-81b-chat")
assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("llama-v2-13b-chat")
assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("xxx-15.5b-chat")
assert MODEL_GRADE_TOKEN_COSTS["16"] == cost_manager.model_grade_token_costs("xxx-16b-chat")
assert MODEL_GRADE_TOKEN_COSTS["80"] == cost_manager.model_grade_token_costs("xxx-80b-chat")
assert MODEL_GRADE_TOKEN_COSTS["mixtral-8x7b"] == cost_manager.model_grade_token_costs("mixtral-8x7b-chat")
cost_manager.update_cost(prompt_tokens=500000, completion_tokens=500000, model="llama-v2-13b-chat")
assert cost_manager.total_cost == 0.5
async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
if stream:
class Iterator(object):
async def __aiter__(self):
yield default_resp_chunk
return Iterator()
else:
return default_resp
@pytest.mark.asyncio
async def test_fireworks_acompletion(mocker):
mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
fireworks_gpt = FireworksLLM(mock_llm_config)
fireworks_gpt.model = "llama-v2-13b-chat"
fireworks_gpt._update_costs(
usage=CompletionUsage(prompt_tokens=500000, completion_tokens=500000, total_tokens=1000000)
)
assert fireworks_gpt.get_costs() == Costs(
total_prompt_tokens=500000, total_completion_tokens=500000, total_cost=0.5, total_budget=0
)
resp = await fireworks_gpt.acompletion(messages)
assert resp.choices[0].message.content in resp_content
resp = await fireworks_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await fireworks_gpt.acompletion_text(messages, stream=False)
assert resp == resp_content
resp = await fireworks_gpt.acompletion_text(messages, stream=True)
assert resp == resp_content
resp = await fireworks_gpt.aask(prompt_msg)
assert resp == resp_content

View file

@ -11,6 +11,12 @@ from google.generativeai.types import content_types
from metagpt.provider.google_gemini_api import GeminiLLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config
from tests.metagpt.provider.req_resp_const import (
gemini_messages,
llm_general_chat_funcs_test,
prompt,
resp_cont_tmpl,
)
@dataclass
@ -18,10 +24,8 @@ class MockGeminiResponse(ABC):
text: str
prompt_msg = "who are you"
messages = [{"role": "user", "parts": prompt_msg}]
resp_content = "I'm gemini from google"
default_resp = MockGeminiResponse(text=resp_content)
resp_cont = resp_cont_tmpl.format(name="gemini")
default_resp = MockGeminiResponse(text=resp_cont)
def mock_gemini_count_tokens(self, contents: content_types.ContentsType) -> glm.CountTokensResponse:
@ -60,28 +64,18 @@ async def test_gemini_acompletion(mocker):
mock_gemini_generate_content_async,
)
gemini_gpt = GeminiLLM(mock_llm_config)
gemini_llm = GeminiLLM(mock_llm_config)
assert gemini_gpt._user_msg(prompt_msg) == {"role": "user", "parts": [prompt_msg]}
assert gemini_gpt._assistant_msg(prompt_msg) == {"role": "model", "parts": [prompt_msg]}
assert gemini_llm._user_msg(prompt) == {"role": "user", "parts": [prompt]}
assert gemini_llm._assistant_msg(prompt) == {"role": "model", "parts": [prompt]}
usage = gemini_gpt.get_usage(messages, resp_content)
usage = gemini_llm.get_usage(gemini_messages, resp_cont)
assert usage == {"prompt_tokens": 20, "completion_tokens": 20}
resp = gemini_gpt.completion(messages)
resp = gemini_llm.completion(gemini_messages)
assert resp == default_resp
resp = await gemini_gpt.acompletion(messages)
resp = await gemini_llm.acompletion(gemini_messages)
assert resp.text == default_resp.text
resp = await gemini_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await gemini_gpt.acompletion_text(messages, stream=False)
assert resp == resp_content
resp = await gemini_gpt.acompletion_text(messages, stream=True)
assert resp == resp_content
resp = await gemini_gpt.aask(prompt_msg)
assert resp == resp_content
await llm_general_chat_funcs_test(gemini_llm, prompt, gemini_messages, resp_cont)

View file

@ -9,12 +9,15 @@ import pytest
from metagpt.provider.ollama_api import OllamaLLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config
from tests.metagpt.provider.req_resp_const import (
llm_general_chat_funcs_test,
messages,
prompt,
resp_cont_tmpl,
)
prompt_msg = "who are you"
messages = [{"role": "user", "content": prompt_msg}]
resp_content = "I'm ollama"
default_resp = {"message": {"role": "assistant", "content": resp_content}}
resp_cont = resp_cont_tmpl.format(name="ollama")
default_resp = {"message": {"role": "assistant", "content": resp_cont}}
async def mock_ollama_arequest(self, stream: bool = False, **kwargs) -> Tuple[Any, Any, bool]:
@ -41,19 +44,12 @@ async def mock_ollama_arequest(self, stream: bool = False, **kwargs) -> Tuple[An
async def test_gemini_acompletion(mocker):
mocker.patch("metagpt.provider.general_api_requestor.GeneralAPIRequestor.arequest", mock_ollama_arequest)
ollama_gpt = OllamaLLM(mock_llm_config)
ollama_llm = OllamaLLM(mock_llm_config)
resp = await ollama_gpt.acompletion(messages)
resp = await ollama_llm.acompletion(messages)
assert resp["message"]["content"] == default_resp["message"]["content"]
resp = await ollama_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await ollama_llm.aask(prompt, stream=False)
assert resp == resp_cont
resp = await ollama_gpt.acompletion_text(messages, stream=False)
assert resp == resp_content
resp = await ollama_gpt.acompletion_text(messages, stream=True)
assert resp == resp_content
resp = await ollama_gpt.aask(prompt_msg)
assert resp == resp_content
await llm_general_chat_funcs_test(ollama_llm, prompt, messages, resp_cont)

View file

@ -1,92 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc :
import pytest
from openai.types.chat.chat_completion import (
ChatCompletion,
ChatCompletionMessage,
Choice,
)
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
from openai.types.chat.chat_completion_chunk import Choice as AChoice
from openai.types.chat.chat_completion_chunk import ChoiceDelta
from openai.types.completion_usage import CompletionUsage
from metagpt.provider.open_llm_api import OpenLLM
from metagpt.utils.cost_manager import Costs
from tests.metagpt.provider.mock_llm_config import mock_llm_config
resp_content = "I'm llama2"
default_resp = ChatCompletion(
id="cmpl-a6652c1bb181caae8dd19ad8",
model="llama-v2-13b-chat",
object="chat.completion",
created=1703302755,
choices=[
Choice(
finish_reason="stop",
index=0,
message=ChatCompletionMessage(role="assistant", content=resp_content),
logprobs=None,
)
],
)
default_resp_chunk = ChatCompletionChunk(
id=default_resp.id,
model=default_resp.model,
object="chat.completion.chunk",
created=default_resp.created,
choices=[
AChoice(
delta=ChoiceDelta(content=resp_content, role="assistant"),
finish_reason="stop",
index=0,
logprobs=None,
)
],
)
prompt_msg = "who are you"
messages = [{"role": "user", "content": prompt_msg}]
async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
if stream:
class Iterator(object):
async def __aiter__(self):
yield default_resp_chunk
return Iterator()
else:
return default_resp
@pytest.mark.asyncio
async def test_openllm_acompletion(mocker):
mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
openllm_gpt = OpenLLM(mock_llm_config)
openllm_gpt.model = "llama-v2-13b-chat"
openllm_gpt._update_costs(usage=CompletionUsage(prompt_tokens=100, completion_tokens=100, total_tokens=200))
assert openllm_gpt.get_costs() == Costs(
total_prompt_tokens=100, total_completion_tokens=100, total_cost=0, total_budget=0
)
resp = await openllm_gpt.acompletion(messages)
assert resp.choices[0].message.content in resp_content
resp = await openllm_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await openllm_gpt.acompletion_text(messages, stream=False)
assert resp == resp_content
resp = await openllm_gpt.acompletion_text(messages, stream=True)
assert resp == resp_content
resp = await openllm_gpt.aask(prompt_msg)
assert resp == resp_content

View file

@ -1,12 +1,11 @@
import json
import pytest
from openai.types.chat import (
ChatCompletion,
ChatCompletionChunk,
ChatCompletionMessage,
ChatCompletionMessageToolCall,
)
from openai.types.chat.chat_completion import Choice
from openai.types.chat.chat_completion import Choice, CompletionUsage
from openai.types.chat.chat_completion_message_tool_call import Function
from PIL import Image
@ -18,6 +17,22 @@ from tests.metagpt.provider.mock_llm_config import (
mock_llm_config,
mock_llm_config_proxy,
)
from tests.metagpt.provider.req_resp_const import (
get_openai_chat_completion,
get_openai_chat_completion_chunk,
llm_general_chat_funcs_test,
messages,
prompt,
resp_cont_tmpl,
)
name = "AI assistant"
resp_cont = resp_cont_tmpl.format(name=name)
default_resp = get_openai_chat_completion(name)
default_resp_chunk = get_openai_chat_completion_chunk(name, usage_as_dict=True)
usage = CompletionUsage(completion_tokens=110, prompt_tokens=92, total_tokens=202)
@pytest.mark.asyncio
@ -106,9 +121,11 @@ class TestOpenAI:
def test_aask_code_json_decode_error(self, json_decode_error):
instance = OpenAILLM(mock_llm_config)
with pytest.raises(json.decoder.JSONDecodeError) as e:
instance.get_choice_function_arguments(json_decode_error)
assert "JSONDecodeError" in str(e)
code = instance.get_choice_function_arguments(json_decode_error)
assert "code" in code
assert "language" in code
assert "hello world" in code["code"]
logger.info(f'code is : {code["code"]}')
@pytest.mark.asyncio
@ -121,3 +138,29 @@ async def test_gen_image():
images: list[Image] = await llm.gen_image(model=model, prompt=prompt, resp_format="b64_json")
assert images[0].size == (1024, 1024)
async def mock_openai_acompletions_create(self, stream: bool = False, **kwargs) -> ChatCompletionChunk:
if stream:
class Iterator(object):
async def __aiter__(self):
yield default_resp_chunk
return Iterator()
else:
return default_resp
@pytest.mark.asyncio
async def test_openai_acompletion(mocker):
mocker.patch("openai.resources.chat.completions.AsyncCompletions.create", mock_openai_acompletions_create)
llm = OpenAILLM(mock_llm_config)
resp = await llm.acompletion(messages)
assert resp.choices[0].finish_reason == "stop"
assert resp.choices[0].message.content == resp_cont
assert resp.usage == usage
await llm_general_chat_funcs_test(llm, prompt, messages, resp_cont)

View file

@ -0,0 +1,56 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Desc : the unittest of qianfan api
from typing import AsyncIterator, Union
import pytest
from qianfan.resources.typing import JsonBody, QfResponse
from metagpt.provider.qianfan_api import QianFanLLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config_qianfan
from tests.metagpt.provider.req_resp_const import (
get_qianfan_response,
llm_general_chat_funcs_test,
messages,
prompt,
resp_cont_tmpl,
)
name = "ERNIE-Bot-turbo"
resp_cont = resp_cont_tmpl.format(name=name)
def mock_qianfan_do(self, messages: list[dict], model: str, stream: bool = False, system: str = None) -> QfResponse:
return get_qianfan_response(name=name)
async def mock_qianfan_ado(
self, messages: list[dict], model: str, stream: bool = True, system: str = None
) -> Union[QfResponse, AsyncIterator[QfResponse]]:
resps = [get_qianfan_response(name=name)]
if stream:
async def aresp_iterator(resps: list[JsonBody]):
for resp in resps:
yield resp
return aresp_iterator(resps)
else:
return resps[0]
@pytest.mark.asyncio
async def test_qianfan_acompletion(mocker):
mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.do", mock_qianfan_do)
mocker.patch("qianfan.resources.llm.chat_completion.ChatCompletion.ado", mock_qianfan_ado)
qianfan_llm = QianFanLLM(mock_llm_config_qianfan)
resp = qianfan_llm.completion(messages)
assert resp.get("result") == resp_cont
resp = await qianfan_llm.acompletion(messages)
assert resp.get("result") == resp_cont
await llm_general_chat_funcs_test(qianfan_llm, prompt, messages, resp_cont)

View file

@ -4,12 +4,18 @@
import pytest
from metagpt.config2 import Config
from metagpt.provider.spark_api import GetMessageFromWeb, SparkLLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config
from tests.metagpt.provider.mock_llm_config import (
mock_llm_config,
mock_llm_config_spark,
)
from tests.metagpt.provider.req_resp_const import (
llm_general_chat_funcs_test,
prompt,
resp_cont_tmpl,
)
prompt_msg = "who are you"
resp_content = "I'm Spark"
resp_cont = resp_cont_tmpl.format(name="Spark")
class MockWebSocketApp(object):
@ -23,7 +29,7 @@ class MockWebSocketApp(object):
def test_get_msg_from_web(mocker):
mocker.patch("websocket.WebSocketApp", MockWebSocketApp)
get_msg_from_web = GetMessageFromWeb(prompt_msg, mock_llm_config)
get_msg_from_web = GetMessageFromWeb(prompt, mock_llm_config)
assert get_msg_from_web.gen_params()["parameter"]["chat"]["domain"] == "mock_domain"
ret = get_msg_from_web.run()
@ -31,34 +37,26 @@ def test_get_msg_from_web(mocker):
def mock_spark_get_msg_from_web_run(self) -> str:
return resp_content
return resp_cont
@pytest.mark.asyncio
async def test_spark_aask():
llm = SparkLLM(Config.from_home("spark.yaml").llm)
async def test_spark_aask(mocker):
mocker.patch("metagpt.provider.spark_api.GetMessageFromWeb.run", mock_spark_get_msg_from_web_run)
llm = SparkLLM(mock_llm_config_spark)
resp = await llm.aask("Hello!")
print(resp)
assert resp == resp_cont
@pytest.mark.asyncio
async def test_spark_acompletion(mocker):
mocker.patch("metagpt.provider.spark_api.GetMessageFromWeb.run", mock_spark_get_msg_from_web_run)
spark_gpt = SparkLLM(mock_llm_config)
spark_llm = SparkLLM(mock_llm_config)
resp = await spark_gpt.acompletion([])
assert resp == resp_content
resp = await spark_llm.acompletion([])
assert resp == resp_cont
resp = await spark_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await spark_gpt.acompletion_text([], stream=False)
assert resp == resp_content
resp = await spark_gpt.acompletion_text([], stream=True)
assert resp == resp_content
resp = await spark_gpt.aask(prompt_msg)
assert resp == resp_content
await llm_general_chat_funcs_test(spark_llm, prompt, prompt, resp_cont)

View file

@ -6,22 +6,24 @@ import pytest
from metagpt.provider.zhipuai_api import ZhiPuAILLM
from tests.metagpt.provider.mock_llm_config import mock_llm_config_zhipu
from tests.metagpt.provider.req_resp_const import (
get_part_chat_completion,
llm_general_chat_funcs_test,
messages,
prompt,
resp_cont_tmpl,
)
prompt_msg = "who are you"
messages = [{"role": "user", "content": prompt_msg}]
resp_content = "I'm chatglm-turbo"
default_resp = {
"choices": [{"finish_reason": "stop", "index": 0, "message": {"content": resp_content, "role": "assistant"}}],
"usage": {"completion_tokens": 22, "prompt_tokens": 19, "total_tokens": 41},
}
name = "ChatGLM-4"
resp_cont = resp_cont_tmpl.format(name=name)
default_resp = get_part_chat_completion(name)
async def mock_zhipuai_acreate_stream(**kwargs):
async def mock_zhipuai_acreate_stream(self, **kwargs):
class MockResponse(object):
async def _aread(self):
class Iterator(object):
events = [{"choices": [{"index": 0, "delta": {"content": resp_content, "role": "assistant"}}]}]
events = [{"choices": [{"index": 0, "delta": {"content": resp_cont, "role": "assistant"}}]}]
async def __aiter__(self):
for event in self.events:
@ -37,7 +39,7 @@ async def mock_zhipuai_acreate_stream(**kwargs):
return MockResponse()
async def mock_zhipuai_acreate(**kwargs) -> dict:
async def mock_zhipuai_acreate(self, **kwargs) -> dict:
return default_resp
@ -46,22 +48,12 @@ async def test_zhipuai_acompletion(mocker):
mocker.patch("metagpt.provider.zhipuai.zhipu_model_api.ZhiPuModelAPI.acreate", mock_zhipuai_acreate)
mocker.patch("metagpt.provider.zhipuai.zhipu_model_api.ZhiPuModelAPI.acreate_stream", mock_zhipuai_acreate_stream)
zhipu_gpt = ZhiPuAILLM(mock_llm_config_zhipu)
zhipu_llm = ZhiPuAILLM(mock_llm_config_zhipu)
resp = await zhipu_gpt.acompletion(messages)
assert resp["choices"][0]["message"]["content"] == resp_content
resp = await zhipu_llm.acompletion(messages)
assert resp["choices"][0]["message"]["content"] == resp_cont
resp = await zhipu_gpt.aask(prompt_msg, stream=False)
assert resp == resp_content
resp = await zhipu_gpt.acompletion_text(messages, stream=False)
assert resp == resp_content
resp = await zhipu_gpt.acompletion_text(messages, stream=True)
assert resp == resp_content
resp = await zhipu_gpt.aask(prompt_msg)
assert resp == resp_content
await llm_general_chat_funcs_test(zhipu_llm, prompt, messages, resp_cont)
def test_zhipuai_proxy():

View file

@ -1,19 +0,0 @@
import pytest
from metagpt.logs import logger
from metagpt.roles.ci.code_interpreter import CodeInterpreter
@pytest.mark.asyncio
@pytest.mark.parametrize("auto_run", [(True), (False)])
async def test_code_interpreter(mocker, auto_run):
mocker.patch("metagpt.actions.ci.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
mocker.patch("builtins.input", return_value="confirm")
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
tools = []
ci = CodeInterpreter(auto_run=auto_run, use_tools=True, tools=tools)
rsp = await ci.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0

View file

@ -0,0 +1,23 @@
import pytest
from metagpt.logs import logger
from metagpt.roles.mi.interpreter import Interpreter
@pytest.mark.asyncio
@pytest.mark.parametrize("auto_run", [(True), (False)])
async def test_interpreter(mocker, auto_run):
mocker.patch("metagpt.actions.mi.execute_nb_code.ExecuteNbCode.run", return_value=("a successful run", True))
mocker.patch("builtins.input", return_value="confirm")
requirement = "Run data analysis on sklearn Iris dataset, include a plot"
tools = []
mi = Interpreter(auto_run=auto_run, use_tools=True, tools=tools)
rsp = await mi.run(requirement)
logger.info(rsp)
assert len(rsp.content) > 0
finished_tasks = mi.planner.plan.get_finished_tasks()
assert len(finished_tasks) > 0
assert len(finished_tasks[0].code) > 0 # check one task to see if code is recorded

View file

@ -1,16 +1,16 @@
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
from metagpt.logs import logger
from metagpt.roles.ci.ml_engineer import MLEngineer
from metagpt.roles.mi.ml_engineer import MLEngineer
from metagpt.schema import Message, Plan, Task
from metagpt.tools.tool_type import ToolType
from tests.metagpt.actions.ci.test_debug_code import CODE, DebugContext, ErrorStr
from tests.metagpt.actions.mi.test_debug_code import CODE, DebugContext, ErrorStr
def test_mle_init():
ci = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
assert ci.tools == []
mle = MLEngineer(goal="test", auto_run=True, use_tools=True, tools=["tool1", "tool2"])
assert mle.tools == []
MockPlan = Plan(

View file

@ -0,0 +1,7 @@
from metagpt.tools.libs.email_login import email_login_imap
def test_email_login(mocker):
mock_mailbox = mocker.patch("metagpt.tools.libs.email_login.MailBox.login")
mock_mailbox.login.return_value = mocker.Mock()
email_login_imap("test@outlook.com", "test_password")

View file

@ -14,7 +14,7 @@ from metagpt.utils.mermaid import MMC1, mermaid_to_file
@pytest.mark.asyncio
@pytest.mark.parametrize("engine", ["nodejs", "ink"]) # TODO: playwright and pyppeteer
async def test_mermaid(engine, context):
async def test_mermaid(engine, context, mermaid_mocker):
# nodejs prerequisites: npm install -g @mermaid-js/mermaid-cli
# ink prerequisites: connected to internet
# playwright prerequisites: playwright install --with-deps chromium

View file

@ -211,6 +211,11 @@ value
output = repair_invalid_json(output, "Expecting ',' delimiter: line 4 column 1")
assert output == target_output
raw_output = '{"key": "url "http" \\"https\\" "}'
target_output = '{"key": "url \\"http\\" \\"https\\" "}'
output = repair_invalid_json(raw_output, "Expecting ',' delimiter: line 1 column 15 (char 14)")
assert output == target_output
def test_retry_parse_json_text():
from metagpt.utils.repair_llm_raw_output import retry_parse_json_text

View file

@ -6,7 +6,7 @@
import nbformat
import pytest
from metagpt.actions.ci.execute_nb_code import ExecuteNbCode
from metagpt.actions.mi.execute_nb_code import ExecuteNbCode
from metagpt.utils.common import read_json_file
from metagpt.utils.save_code import DATA_PATH, save_code_file

View file

@ -42,6 +42,7 @@ def test_reduce_message_length(msgs, model_name, system_text, reserved, expected
(" ".join("Hello World." for _ in range(1000)), "Prompt: {}", "gpt-3.5-turbo-16k", "System", 3000, 1),
(" ".join("Hello World." for _ in range(4000)), "Prompt: {}", "gpt-4", "System", 2000, 2),
(" ".join("Hello World." for _ in range(8000)), "Prompt: {}", "gpt-4-32k", "System", 4000, 1),
(" ".join("Hello World" for _ in range(8000)), "Prompt: {}", "gpt-3.5-turbo", "System", 1000, 8),
],
)
def test_generate_prompt_chunk(text, prompt_template, model_name, system_text, reserved, expected):

View file

@ -10,6 +10,7 @@ class MockAioResponse:
check_funcs: dict[tuple[str, str], Callable[[dict], str]] = {}
rsp_cache: dict[str, str] = {}
name = "aiohttp"
status = 200
def __init__(self, session, method, url, **kwargs) -> None:
fn = self.check_funcs.get((method, url))
@ -22,6 +23,7 @@ class MockAioResponse:
async def __aenter__(self):
if self.response:
await self.response.__aenter__()
self.status = self.response.status
elif self.mng:
self.response = await self.mng.__aenter__()
return self
@ -41,6 +43,17 @@ class MockAioResponse:
self.rsp_cache[self.key] = data
return data
@property
def content(self):
return self
async def read(self):
if self.key in self.rsp_cache:
return eval(self.rsp_cache[self.key])
data = await self.response.content.read()
self.rsp_cache[self.key] = str(data)
return data
def raise_for_status(self):
if self.response:
self.response.raise_for_status()

View file

@ -1,7 +0,0 @@
llm:
api_type: "spark"
app_id: "xxx"
api_key: "xxx"
api_secret: "xxx"
domain: "generalv2"
base_url: "wss://spark-api.xf-yun.com/v3.1/chat"