mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-21 14:05:17 +02:00
Update README.md add aide setup and run
This commit is contained in:
parent
e3fccce73d
commit
af41f1f1cf
1 changed files with 145 additions and 1 deletions
146
expo/README.md
146
expo/README.md
|
|
@ -57,7 +57,151 @@ ### DS Agent
|
|||
|
||||
|
||||
### AIDE
|
||||
提供github链接,并说明使用的命令以及参数设置
|
||||
|
||||
#### Setup
|
||||
|
||||
```
|
||||
git clone https://github.com/WecoAI/aideml.git
|
||||
```
|
||||
|
||||
修改 `aideml/aide/utils/config.yaml` 内容如下
|
||||
|
||||
```yaml
|
||||
# path to the task data directory
|
||||
data_dir: null
|
||||
|
||||
# either provide a path to a plaintext file describing the task
|
||||
desc_file: null
|
||||
# or provide the task goal (and optionally evaluation information) as arguments
|
||||
goal: null
|
||||
eval: null
|
||||
|
||||
log_dir: logs
|
||||
workspace_dir: workspaces
|
||||
|
||||
# whether to unzip any archives in the data directory
|
||||
preprocess_data: True
|
||||
# whether to copy the data to the workspace directory (otherwise it will be symlinked)
|
||||
# copying is recommended to prevent the agent from accidentally modifying the original data
|
||||
copy_data: True
|
||||
|
||||
exp_name: null # a random experiment name will be generated if not provided
|
||||
|
||||
# settings for code execution
|
||||
exec:
|
||||
timeout: 3600
|
||||
agent_file_name: runfile.py
|
||||
format_tb_ipython: False
|
||||
|
||||
# agent hyperparams
|
||||
agent:
|
||||
# how many improvement iterations to run
|
||||
steps: 10
|
||||
# whether to instruct the agent to use CV (set to 1 to disable)
|
||||
k_fold_validation: 1
|
||||
# whether to instruct the agent to generate a prediction function
|
||||
expose_prediction: False
|
||||
# whether to provide the agent with a preview of the data
|
||||
data_preview: True
|
||||
|
||||
# LLM settings for coding
|
||||
code:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
|
||||
# LLM settings for evaluating program output / tracebacks
|
||||
feedback:
|
||||
model: deepseek-coder
|
||||
temp: 0.5
|
||||
|
||||
# hyperparameters for the tree search
|
||||
search:
|
||||
max_debug_depth: 3
|
||||
debug_prob: 0.5
|
||||
num_drafts: 5
|
||||
```
|
||||
|
||||
由于 deepseek 完全兼容 OpenAI 的 API,修改`base_url`为`自己的url`,`api_key`为`自己的key`即可
|
||||
|
||||
```
|
||||
export OPENAI_API_KEY="自己的key"
|
||||
export OPENAI_BASE_URL="自己的url"
|
||||
```
|
||||
|
||||
修改`aideml/aide/backend/__init__.py` 30 行内容如下:
|
||||
|
||||
```python
|
||||
model_kwargs = model_kwargs | {
|
||||
"model": model,
|
||||
"temperature": temperature,
|
||||
"max_tokens": max_tokens,
|
||||
}
|
||||
if "claude-" in model:
|
||||
query_func = backend_anthropic.query
|
||||
else:
|
||||
query_func = backend_openai.query
|
||||
```
|
||||
|
||||
由于 deepseekV2.5 不再支持 system message 使用 function call,修改 `aideml/aide/agent.py` 312 行内容如下:
|
||||
|
||||
```python
|
||||
response = cast(
|
||||
dict,
|
||||
query(
|
||||
system_message=None,
|
||||
user_message=prompt,
|
||||
func_spec=review_func_spec,
|
||||
model=self.acfg.feedback.model,
|
||||
temperature=self.acfg.feedback.temp,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
修改完后
|
||||
|
||||
```
|
||||
cd aideml
|
||||
pip install -e .
|
||||
```
|
||||
|
||||
#### Run
|
||||
|
||||
运行下面脚本获取运行结果,在当前目录下将生成一个 log 文件夹以及 workspace 文件夹
|
||||
log 文件夹中将包含实验使用配置以及生成方案记录,workspace 文件夹下将保存 aide 最后生成的结果文件
|
||||
|
||||
```python
|
||||
import aide
|
||||
import os
|
||||
import time
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "sk-xxx"
|
||||
os.environ["OPENAI_BASE_URL"] = "your url"
|
||||
start_time = time.time()
|
||||
data_dir = "xxx/data/titanic"
|
||||
goal = f"""
|
||||
# User requirement
|
||||
({data_dir}, 'This is a 04_titanic dataset. Your goal is to predict the target column `Survived`.\nPerform data analysis, data preprocessing, feature engineering, and modeling to predict the target. \nReport f1 on the eval data. Do not plot or make any visualizations.\n')
|
||||
|
||||
# Data dir
|
||||
training (with labels): train.csv
|
||||
testing (without labels): test.csv
|
||||
dataset description: dataset_info.json (You can use this file to get additional information about the dataset)"""
|
||||
|
||||
exp = aide.Experiment(
|
||||
data_dir=data_dir, # replace this with your own directory
|
||||
goal=goal,
|
||||
eval="f1", # replace with your own evaluation metric
|
||||
)
|
||||
|
||||
best_solution = exp.run(steps=10)
|
||||
|
||||
print(f"Best solution has validation metric: {best_solution.valid_metric}")
|
||||
print(f"Best solution code: {best_solution.code}")
|
||||
end_time = time.time()
|
||||
execution_time = end_time - start_time
|
||||
|
||||
print(f"run time : {execution_time} seconds")
|
||||
```
|
||||
|
||||
### Autogluon
|
||||
#### Setup
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue