diff --git a/examples/build_customized_agent.py b/examples/build_customized_agent.py
index 87d7a9c76..c7069b768 100644
--- a/examples/build_customized_agent.py
+++ b/examples/build_customized_agent.py
@@ -75,10 +75,9 @@ class SimpleCoder(Role):
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
 
-        msg = self._rc.memory.get()[-1] # retrieve the latest memory
-        instruction = msg.content
+        msg = self.get_memories(k=1)[0] # find the most recent messages
 
-        code_text = await SimpleWriteCode().run(instruction)
+        code_text = await SimpleWriteCode().run(msg.content)
         msg = Message(content=code_text, role=self.profile, cause_by=todo)
 
         return msg
@@ -92,43 +91,20 @@ class RunnableCoder(Role):
     ):
         super().__init__(name, profile, **kwargs)
         self._init_actions([SimpleWriteCode, SimpleRunCode])
-
-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")
 
     async def _act(self) -> Message:
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
-        msg = self._rc.memory.get()[-1]
 
-        if isinstance(todo, SimpleWriteCode):
-            instruction = msg.content
-            result = await SimpleWriteCode().run(instruction)
+        msg = self.get_memories(k=1)[0] # find the most k recent messages
+        result = await todo.run(msg.content)
 
-        elif isinstance(todo, SimpleRunCode):
-            code_text = msg.content
-            result = await SimpleRunCode().run(code_text)
-
-        msg = Message(content=result, role=self.profile, cause_by=todo)
+        msg = Message(content=result, role=self.profile, cause_by=type(todo))
         self._rc.memory.add(msg)
         return msg
 
-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            await self._act()
-        return Message(content="All job done", role=self.profile)
-
-def main(msg="write a function that calculates the sum of a list"):
+def main(msg="write a function that calculates the product of a list and run it"):
     # role = SimpleCoder()
     role = RunnableCoder()
     logger.info(msg)
diff --git a/metagpt/roles/invoice_ocr_assistant.py b/metagpt/roles/invoice_ocr_assistant.py
index c307b20c0..15f831c97 100644
--- a/metagpt/roles/invoice_ocr_assistant.py
+++ b/metagpt/roles/invoice_ocr_assistant.py
@@ -42,17 +42,7 @@ class InvoiceOCRAssistant(Role):
         self.filename = ""
         self.origin_query = ""
         self.orc_data = None
-
-    async def _think(self) -> None:
-        """Determine the next action to be taken by the role."""
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
+        self._set_react_mode(react_mode="by_order")
 
     async def _act(self) -> Message:
         """Perform an action as determined by the role.
@@ -94,17 +84,3 @@ class InvoiceOCRAssistant(Role):
         msg = Message(content=content, instruct_content=resp)
         self._rc.memory.add(msg)
         return msg
-
-    async def _react(self) -> Message:
-        """Execute the invoice ocr assistant's think and actions.
-
-        Returns:
-            A message containing the final result of the assistant's actions.
-        """
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
-        return msg
-
diff --git a/metagpt/roles/researcher.py b/metagpt/roles/researcher.py
index acb46c718..c5512121a 100644
--- a/metagpt/roles/researcher.py
+++ b/metagpt/roles/researcher.py
@@ -31,20 +31,11 @@ class Researcher(Role):
     ):
         super().__init__(name, profile, goal, constraints, **kwargs)
         self._init_actions([CollectLinks(name), WebBrowseAndSummarize(name), ConductResearch(name)])
+        self._set_react_mode(react_mode="by_order")
         self.language = language
         if language not in ("en-us", "zh-cn"):
             logger.warning(f"The language `{language}` has not been tested, it may not work.")
 
-    async def _think(self) -> None:
-        if self._rc.todo is None:
-            self._set_state(0)
-            return
-
-        if self._rc.state + 1 < len(self._states):
-            self._set_state(self._rc.state + 1)
-        else:
-            self._rc.todo = None
-
     async def _act(self) -> Message:
         logger.info(f"{self._setting}: ready to {self._rc.todo}")
         todo = self._rc.todo
@@ -73,12 +64,8 @@ class Researcher(Role):
         self._rc.memory.add(ret)
         return ret
 
-    async def _react(self) -> Message:
-        while True:
-            await self._think()
-            if self._rc.todo is None:
-                break
-            msg = await self._act()
+    async def react(self) -> Message:
+        msg = await super().react()
         report = msg.instruct_content
         self.write_report(report.topic, report.content)
         return msg
diff --git a/metagpt/roles/role.py b/metagpt/roles/role.py
index 44bb3e976..0251176f7 100644
--- a/metagpt/roles/role.py
+++ b/metagpt/roles/role.py
@@ -7,7 +7,8 @@
 """
 from __future__ import annotations
 
-from typing import Iterable, Type
+from typing import Iterable, Type, Union
+from enum import Enum
 
 from pydantic import BaseModel, Field
 
@@ -27,12 +28,14 @@ Please note that only the text between the first and second "===" is information
 {history}
 ===
 
-You can now choose one of the following stages to decide the stage you need to go in the next step:
+Your previous stage: {previous_state}
+
+Now choose one of the following stages you need to go to in the next step:
 {states}
 
 Just answer a number between 0-{n_states}, choose the most suitable stage according to the understanding of the conversation.
 Please note that the answer only needs a number, no need to add any other text.
-If there is no conversation record, choose 0.
+If you think you have completed your goal and don't need to go to any of the stages, return -1.
 Do not answer anything else, and do not add any other information in your answer.
 """
 
@@ -46,6 +49,14 @@ ROLE_TEMPLATE = """Your response should be based on the previous conversation hi
 {name}: {result}
 """
 
+class RoleReactMode(str, Enum):
+    REACT = "react"
+    BY_ORDER = "by_order"
+    PLAN_AND_ACT = "plan_and_act"
+
+    @classmethod
+    def values(cls):
+        return [item.value for item in cls]
 
 class RoleSetting(BaseModel):
     """Role Settings"""
@@ -67,10 +78,12 @@ class RoleContext(BaseModel):
     env: 'Environment' = Field(default=None)
     memory: Memory = Field(default_factory=Memory)
     long_term_memory: LongTermMemory = Field(default_factory=LongTermMemory)
-    state: int = Field(default=0)
+    state: int = Field(default=-1) # -1 indicates initial or termination state where todo is None
     todo: Action = Field(default=None)
     watch: set[Type[Action]] = Field(default_factory=set)
     news: list[Type[Message]] = Field(default=[])
+    react_mode: RoleReactMode = RoleReactMode.REACT # see `Role._set_react_mode` for definitions of the following two attributes
+    max_react_loop: int = 1
 
     class Config:
         arbitrary_types_allowed = True
@@ -116,17 +129,38 @@ class Role:
             self._actions.append(i)
             self._states.append(f"{idx}. {action}")
 
+    def _set_react_mode(self, react_mode: str, max_react_loop: int = 1):
+        """Set strategy of the Role reacting to observed Message. Variation lies in how
+        this Role elects action to perform during the _think stage, especially if it is capable of multiple Actions.
+
+        Args:
+            react_mode (str): Mode for choosing action during the _think stage, can be one of:
+                        "react": standard think-act loop in the ReAct paper, alternating thinking and acting to solve the task, i.e. _think -> _act -> _think -> _act -> ...
+                                 Use llm to select actions in _think dynamically;
+                        "by_order": switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ...;
+                        "plan_and_act": first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ...
+                                        Use llm to come up with the plan dynamically.
+                        Defaults to "react".
+            max_react_loop (int): Maximum react cycles to execute, used to prevent the agent from reacting forever.
+                                  Take effect only when react_mode is react, in which we use llm to choose actions, including termination.
+                                  Defaults to 1, i.e. _think -> _act (-> return result and end)
+        """
+        assert react_mode in RoleReactMode.values(), f"react_mode must be one of {RoleReactMode.values()}"
+        self._rc.react_mode = react_mode
+        if react_mode == RoleReactMode.REACT:
+            self._rc.max_react_loop = max_react_loop
+
     def _watch(self, actions: Iterable[Type[Action]]):
         """Listen to the corresponding behaviors"""
         self._rc.watch.update(actions)
         # check RoleContext after adding watch actions
         self._rc.check(self._role_id)
 
-    def _set_state(self, state):
+    def _set_state(self, state: int):
         """Update the current state."""
         self._rc.state = state
         logger.debug(self._actions)
-        self._rc.todo = self._actions[self._rc.state]
+        self._rc.todo = self._actions[self._rc.state] if state >= 0 else None
 
     def set_env(self, env: 'Environment'):
         """Set the environment in which the role works. The role can talk to the environment and can also receive messages by observing."""
@@ -151,13 +185,19 @@ class Role:
             return
         prompt = self._get_prefix()
         prompt += STATE_TEMPLATE.format(history=self._rc.history, states="\n".join(self._states),
-                                        n_states=len(self._states) - 1)
+                                        n_states=len(self._states) - 1, previous_state=self._rc.state)
+        # print(prompt)
         next_state = await self._llm.aask(prompt)
         logger.debug(f"{prompt=}")
-        if not next_state.isdigit() or int(next_state) not in range(len(self._states)):
-            logger.warning(f'Invalid answer of state, {next_state=}')
-            next_state = "0"
-        self._set_state(int(next_state))
+        if (not next_state.isdigit() and next_state != "-1") \
+            or int(next_state) not in range(-1, len(self._states)):
+            logger.warning(f'Invalid answer of state, {next_state=}, will be set to -1')
+            next_state = -1
+        else:
+            next_state = int(next_state)
+            if next_state == -1:
+                logger.info(f"End actions with {next_state=}")
+        self._set_state(next_state)
 
     async def _act(self) -> Message:
         # prompt = self.get_prefix()
@@ -203,10 +243,45 @@ class Role:
         self._rc.env.publish_message(msg)
 
     async def _react(self) -> Message:
-        """Think first, then act"""
-        await self._think()
-        logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
-        return await self._act()
+        """Think first, then act, until the Role _think it is time to stop and requires no more todo.
+        This is the standard think-act loop in the ReAct paper, which alternates thinking and acting in task solving, i.e. _think -> _act -> _think -> _act -> ... 
+        Use llm to select actions in _think dynamically
+        """
+        actions_taken = 0
+        rsp = Message("No actions taken yet") # will be overwritten after Role _act
+        while actions_taken < self._rc.max_react_loop:
+            # think
+            await self._think()
+            if self._rc.todo is None:
+                break
+            # act
+            logger.debug(f"{self._setting}: {self._rc.state=}, will do {self._rc.todo}")
+            rsp = await self._act()
+            actions_taken += 1
+        return rsp # return output from the last action
+
+    async def _act_by_order(self) -> Message:
+        """switch action each time by order defined in _init_actions, i.e. _act (Action1) -> _act (Action2) -> ..."""
+        for i in range(len(self._states)):
+            self._set_state(i)
+            rsp = await self._act()
+        return rsp # return output from the last action
+
+    async def _plan_and_act(self) -> Message:
+        """first plan, then execute an action sequence, i.e. _think (of a plan) -> _act -> _act -> ... Use llm to come up with the plan dynamically."""
+        # TODO: to be implemented
+        return Message("")
+
+    async def react(self) -> Message:
+        """Entry to one of three strategies by which Role reacts to the observed Message"""
+        if self._rc.react_mode == RoleReactMode.REACT:
+            rsp = await self._react()
+        elif self._rc.react_mode == RoleReactMode.BY_ORDER:
+            rsp = await self._act_by_order()
+        elif self._rc.react_mode == RoleReactMode.PLAN_AND_ACT:
+            rsp = await self._plan_and_act()
+        self._set_state(state=-1) # current reaction is complete, reset state to -1 and todo back to None
+        return rsp
 
     def recv(self, message: Message) -> None:
         """add message to history."""
@@ -223,6 +298,10 @@ class Role:
 
         return await self._react()
 
+    def get_memories(self, k=0) -> list[Message]:
+        """A wrapper to return the most recent k memories of this role, return all when k=0"""
+        return self._rc.memory.get(k=k)
+
     async def run(self, message=None):
         """Observe, and think and act based on the results of the observation"""
         if message:
@@ -237,7 +316,7 @@ class Role:
             logger.debug(f"{self._setting}: no news. waiting.")
             return
 
-        rsp = await self._react()
+        rsp = await self.react()
         # Publish the reply to the environment, waiting for the next subscriber to process
         self._publish_message(rsp)
         return rsp