mirror of
https://github.com/FoundationAgents/MetaGPT.git
synced 2026-05-04 13:22:39 +02:00
feat: + pic2txt
This commit is contained in:
parent
5f55590a57
commit
dcb76de45e
6 changed files with 55 additions and 19 deletions
|
|
@ -20,7 +20,7 @@ from metagpt.utils.common import encode_image, general_after_log, to_markdown_co
|
|||
@register_tool(include_functions=["run"])
|
||||
class Pic2Txt(Action):
|
||||
"""Pic2Txt deal with the following situations:
|
||||
1. Given a picture about the user requirements, write out the textual user requirements.
|
||||
Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements.
|
||||
"""
|
||||
|
||||
async def run(
|
||||
|
|
@ -28,11 +28,36 @@ class Pic2Txt(Action):
|
|||
*,
|
||||
image_paths: List[str],
|
||||
textual_user_requirement: str = "",
|
||||
acknowledge: str = "",
|
||||
legacy_output: str = "",
|
||||
evaluation_conclusion: str = "",
|
||||
additional_technical_requirements: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Given some pictures depicting user requirements alongside contextual description, write out the intact textual user requirements
|
||||
|
||||
Args:
|
||||
image_paths (List[str]): A list of file paths to the input image(s) depicting user requirements.
|
||||
textual_user_requirement (str, optional): Textual user requirement that alongside the given images, if any.
|
||||
legacy_output (str, optional): The intact textual user requirements generated by you last time, if any.
|
||||
evaluation_conclusion (str, optional): Conclusion or evaluation based on the processed requirements.
|
||||
additional_technical_requirements (str, optional): Any supplementary technical details relevant to the process.
|
||||
|
||||
Returns:
|
||||
str: Textual representation of user requirements extracted from the provided image(s).
|
||||
|
||||
Raises:
|
||||
ValueError: If image_paths list is empty.
|
||||
OSError: If there is an issue accessing or reading the image files.
|
||||
|
||||
Example:
|
||||
>>> images = ["requirements/pic/1.png", "requirements/pic/2.png", "requirements/pic/3.png"]
|
||||
>>> textual_user_requirements = "User requirement paragraph 1 ..., . paragraph 2......"
|
||||
>>> action = Pic2Txt()
|
||||
>>> intact_textual_user_requirements = await action.run(image_paths=images, textual_user_requirement=textual_user_requirements)
|
||||
>>> print(intact_textual_user_requirements)
|
||||
"User requirement paragraph 1 ...,  This picture describes... paragraph 2......"
|
||||
|
||||
"""
|
||||
descriptions = {}
|
||||
for i in image_paths:
|
||||
filename = Path(i)
|
||||
|
|
|
|||
|
|
@ -40,8 +40,17 @@ from metagpt.utils.token_counter import (
|
|||
)
|
||||
|
||||
|
||||
@register_provider([LLMType.OPENAI, LLMType.FIREWORKS, LLMType.OPEN_LLM, LLMType.MOONSHOT, LLMType.MISTRAL, LLMType.YI,
|
||||
LLMType.OPEN_ROUTER])
|
||||
@register_provider(
|
||||
[
|
||||
LLMType.OPENAI,
|
||||
LLMType.FIREWORKS,
|
||||
LLMType.OPEN_LLM,
|
||||
LLMType.MOONSHOT,
|
||||
LLMType.MISTRAL,
|
||||
LLMType.YI,
|
||||
LLMType.OPEN_ROUTER,
|
||||
]
|
||||
)
|
||||
class OpenAILLM(BaseLLM):
|
||||
"""Check https://platform.openai.com/examples for examples"""
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class DataAnalyst(DataInterpreter):
|
|||
# print(*context, sep="\n" + "*" * 5 + "\n")
|
||||
async with ThoughtReporter(enable_llm_stream=True):
|
||||
rsp = await self.llm.aask(context)
|
||||
self.commands = json.loads(CodeParser.parse_code(block=None, lang='json', text=rsp))
|
||||
self.commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=rsp))
|
||||
self.rc.working_memory.add(Message(content=rsp, role="assistant"))
|
||||
|
||||
await run_commands(self, self.commands, self.rc.working_memory)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,11 @@ from pydantic import model_validator
|
|||
from metagpt.actions import Action
|
||||
from metagpt.actions.di.run_command import RunCommand
|
||||
from metagpt.logs import logger
|
||||
from metagpt.prompts.di.role_zero import CMD_PROMPT, ROLE_INSTRUCTION, JSON_REPAIR_PROMPT
|
||||
from metagpt.prompts.di.role_zero import (
|
||||
CMD_PROMPT,
|
||||
JSON_REPAIR_PROMPT,
|
||||
ROLE_INSTRUCTION,
|
||||
)
|
||||
from metagpt.roles import Role
|
||||
from metagpt.schema import AIMessage, Message, UserMessage
|
||||
from metagpt.strategy.experience_retriever import DummyExpRetriever, ExpRetriever
|
||||
|
|
@ -21,8 +25,8 @@ from metagpt.tools.libs.editor import Editor
|
|||
from metagpt.tools.tool_recommend import BM25ToolRecommender, ToolRecommender
|
||||
from metagpt.tools.tool_registry import register_tool
|
||||
from metagpt.utils.common import CodeParser
|
||||
from metagpt.utils.repair_llm_raw_output import RepairType, repair_llm_raw_output
|
||||
from metagpt.utils.report import ThoughtReporter
|
||||
from metagpt.utils.repair_llm_raw_output import repair_llm_raw_output, RepairType
|
||||
|
||||
|
||||
@register_tool(include_functions=["ask_human", "reply_to_human"])
|
||||
|
|
@ -166,7 +170,7 @@ class RoleZero(Role):
|
|||
try:
|
||||
commands = CodeParser.parse_code(block=None, lang="json", text=self.command_rsp)
|
||||
commands = json.loads(repair_llm_raw_output(output=commands, req_keys=[None], repair_type=RepairType.JSON))
|
||||
except json.JSONDecodeError as e:
|
||||
except json.JSONDecodeError:
|
||||
commands = await self.llm.aask(msg=JSON_REPAIR_PROMPT.format(json_data=self.command_rsp))
|
||||
commands = json.loads(CodeParser.parse_code(block=None, lang="json", text=commands))
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -9,9 +9,10 @@
|
|||
|
||||
from metagpt.actions import UserRequirement, WritePRD
|
||||
from metagpt.actions.prepare_documents import PrepareDocuments
|
||||
from metagpt.actions.requirement_analysis.requirement.pic2txt import Pic2Txt
|
||||
from metagpt.roles.di.role_zero import RoleZero
|
||||
from metagpt.roles.role import RoleReactMode
|
||||
from metagpt.utils.common import any_to_name, any_to_str
|
||||
from metagpt.utils.common import any_to_name, any_to_str, tool2name
|
||||
from metagpt.utils.git_repository import GitRepository
|
||||
|
||||
|
||||
|
|
@ -32,9 +33,9 @@ class ProductManager(RoleZero):
|
|||
constraints: str = "utilize the same language as the user requirements for seamless communication"
|
||||
todo_action: str = any_to_name(WritePRD)
|
||||
|
||||
instruction: str = """Use WritePRD tool to write PRD"""
|
||||
instruction: str = """Use WritePRD tool to write PRD if a PRD is required; Use `Pic2Txt` tool to write out an intact textual user requirements if an intact textual user requiremnt is required given some images alongside the contextual textual descriptions;"""
|
||||
max_react_loop: int = 1 # FIXME: Read and edit files requires more steps, consider later
|
||||
tools: list[str] = ["Editor:write,read,write_content", "RoleZero", "WritePRD"]
|
||||
tools: list[str] = ["Editor:write,read,write_content", "RoleZero", "WritePRD", Pic2Txt.__name__]
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super().__init__(**kwargs)
|
||||
|
|
@ -47,12 +48,9 @@ class ProductManager(RoleZero):
|
|||
|
||||
def _update_tool_execution(self):
|
||||
wp = WritePRD()
|
||||
self.tool_execution_map.update(
|
||||
{
|
||||
"WritePRD.run": wp.run,
|
||||
"WritePRD": wp.run, # alias
|
||||
}
|
||||
)
|
||||
self.tool_execution_map.update(tool2name(WritePRD, ["run"], wp.run))
|
||||
pic2txt = Pic2Txt()
|
||||
self.tool_execution_map.update(tool2name(Pic2Txt, ["run"], pic2txt.run))
|
||||
|
||||
async def _think(self) -> bool:
|
||||
"""Decide what to do"""
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@ async def test_pic2txt(context):
|
|||
TEST_DATA_PATH / "requirements/pic/3.png",
|
||||
]
|
||||
textual_user_requirements = await aread(filename=TEST_DATA_PATH / "requirements/1.original_requirement.txt")
|
||||
acknowledge = await aread(filename=TEST_DATA_PATH / "requirements/1.acknowledge.md")
|
||||
|
||||
action = Pic2Txt(context=context)
|
||||
rsp = await action.run(
|
||||
image_paths=images, textual_user_requirement=textual_user_requirements, acknowledge=acknowledge
|
||||
image_paths=images,
|
||||
textual_user_requirement=textual_user_requirements,
|
||||
)
|
||||
assert rsp
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue