mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-02 20:32:38 +02:00
Update HumanEval Eval
This commit is contained in:
parent
c7f44e956d
commit
6a84a9d49b
166 changed files with 1352 additions and 6851 deletions
53
cot_humaneval.py
Normal file
53
cot_humaneval.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
from examples.ags.scripts.operator import Operator
|
||||
from examples.ags.scripts.graph import SolveGraph
|
||||
from examples.ags.benchmark.humaneval import humaneval_evaluation
|
||||
from examples.ags.scripts.operator_an import GenerateOp
|
||||
from metagpt.actions.action_node import ActionNode
|
||||
from metagpt.configs.models_config import ModelsConfig
|
||||
from metagpt.llm import LLM
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
HUMANEVAL_PROMPT_GPT = """
|
||||
{question}\nPlease provide a step-by-step explanation in text, followed by your Python function without any additional text or test cases.
|
||||
"""
|
||||
|
||||
class GenerateOp(BaseModel):
|
||||
solution: str = Field(default="", description="Python Solution For This Question.")
|
||||
|
||||
class CoTGenerate(Operator):
|
||||
def __init__(self, llm: LLM, name: str = "Generate"):
|
||||
super().__init__(name, llm)
|
||||
|
||||
async def __call__(self, problem, function_name, mode: str = None):
|
||||
prompt = HUMANEVAL_PROMPT_GPT.format(question=problem)
|
||||
fill_kwargs = {"context": prompt, "llm": self.llm, "function_name": function_name}
|
||||
if mode:
|
||||
fill_kwargs["mode"] = mode
|
||||
node = await ActionNode.from_pydantic(GenerateOp).fill(**fill_kwargs)
|
||||
response = node.instruct_content.model_dump()
|
||||
return response
|
||||
|
||||
class CoTSolveGraph(SolveGraph):
|
||||
def __init__(self, name: str, llm_config, dataset: str):
|
||||
super().__init__(name, llm_config, dataset)
|
||||
self.cot_generate = CoTGenerate(self.llm)
|
||||
|
||||
async def __call__(self, problem, function_name):
|
||||
solution = await self.cot_generate(problem, function_name, mode="code_fill")
|
||||
return solution["solution"], self.llm.cost_manager.total_cost
|
||||
|
||||
if __name__ == "__main__":
|
||||
async def main():
|
||||
# llm_config = ModelsConfig.default().get("gpt-4o-mini")
|
||||
# llm_config = ModelsConfig.default().get("gpt-35-turbo-1106")
|
||||
llm_config = ModelsConfig.default().get("deepseek-chat")
|
||||
# llm_config = ModelsConfig.default().get("gpt-4o")
|
||||
graph = CoTSolveGraph(name="CoT", llm_config=llm_config, dataset="HumanEval")
|
||||
file_path = "examples/ags/data/baseline_data/human-eval.jsonl"
|
||||
samples = 33 # 33/131
|
||||
path = "examples/ags/data/baselines/general/humaneval"
|
||||
score = await humaneval_evaluation(graph, file_path, samples, path,test=True)
|
||||
return score
|
||||
|
||||
import asyncio
|
||||
asyncio.run(main())
|
||||
Loading…
Add table
Add a link
Reference in a new issue