From 62ffa730e0ac6144aead1cfb49f42acae2263420 Mon Sep 17 00:00:00 2001 From: didi <84363704+didiforgithub@users.noreply.github.com> Date: Tue, 10 Sep 2024 10:57:21 +0800 Subject: [PATCH] update humaneval baseline & hotpotqa baseline --- .../ags/experiments/baselines/cot_hotpotqa.py | 0 .../experiments/baselines/cot_humaneval.py | 50 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 examples/ags/experiments/baselines/cot_hotpotqa.py create mode 100644 examples/ags/experiments/baselines/cot_humaneval.py diff --git a/examples/ags/experiments/baselines/cot_hotpotqa.py b/examples/ags/experiments/baselines/cot_hotpotqa.py new file mode 100644 index 000000000..e69de29bb diff --git a/examples/ags/experiments/baselines/cot_humaneval.py b/examples/ags/experiments/baselines/cot_humaneval.py new file mode 100644 index 000000000..8f0db1066 --- /dev/null +++ b/examples/ags/experiments/baselines/cot_humaneval.py @@ -0,0 +1,50 @@ +from examples.ags.scripts.operator import Operator +from examples.ags.scripts.graph import SolveGraph +from examples.ags.benchmark.humaneval import humaneval_evaluation +from examples.ags.scripts.operator_an import GenerateOp +from metagpt.actions.action_node import ActionNode +from metagpt.configs.models_config import ModelsConfig +from metagpt.llm import LLM +from pydantic import BaseModel, Field + +HUMANEVAL_PROMPT_GPT = """ +{question}\nPlease reason step by step, and put your python function in the end. +""" + +class GenerateOp(BaseModel): + solution: str = Field(default="", description="问题的Python函数实现") + +class CoTGenerate(Operator): + def __init__(self, llm: LLM, name: str = "Generate"): + super().__init__(name, llm) + + async def __call__(self, problem, mode: str = None): + prompt = HUMANEVAL_PROMPT_GPT.format(question=problem) + fill_kwargs = {"context": prompt, "llm": self.llm} + if mode: + fill_kwargs["mode"] = mode + node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs) + response = node.instruct_content.model_dump() + return response + +class CoTSolveGraph(SolveGraph): + def __init__(self, name: str, llm_config, dataset: str): + super().__init__(name, llm_config, dataset) + self.cot_generate = CoTGenerate(self.llm) + + async def __call__(self, problem): + solution = await self.cot_generate(problem, mode="code_fill") + return solution["solution"] + +if __name__ == "__main__": + async def main(): + llm_config = ModelsConfig.default().get("gpt-35-turbo-1106") + graph = CoTSolveGraph(name="CoT", llm_config=llm_config, dataset="HumanEval") + file_path = "examples/ags/data/human-eval-new.jsonl" + samples = 1 # 33/131 + path = "examples/ags/data/baselines/general" + score = await humaneval_evaluation(graph, file_path, samples, path) + return score + + import asyncio + asyncio.run(main())