Update to enable knowledge extraction using the agent framework (#439)

* Implement KG extraction agent (kg-extract-agent) * Using ReAct framework (agent-manager-react) * ReAct manager had an issue when emitting JSON, which conflicts which ReAct manager's own JSON messages, so refactored ReAct manager to use traditional ReAct messages, non-JSON structure. * Minor refactor to take the prompt template client out of prompt-template so it can be more readily used by other modules. kg-extract-agent uses this framework.
2026-04-26 00:46:22 +02:00 · 2025-07-21 14:31:57 +01:00 · 2025-07-21 14:31:57 +01:00 · d83e4e3d59
commit d83e4e3d59
parent 1fe4ed5226
30 changed files with 3192 additions and 799 deletions
--- a/tests/integration/test_agent_manager_integration.py
+++ b/tests/integration/test_agent_manager_integration.py
@ -28,11 +28,11 @@ class TestAgentManagerIntegration:
        
        # Mock prompt client
        prompt_client = AsyncMock()
-        prompt_client.agent_react.return_value = {
-            "thought": "I need to search for information about machine learning",
-            "action": "knowledge_query",
-            "arguments": {"question": "What is machine learning?"}
-        }
+        prompt_client.agent_react.return_value = """Thought: I need to search for information about machine learning
+Action: knowledge_query
+Args: {
+    "question": "What is machine learning?"
+}"""
        
        # Mock graph RAG client
        graph_rag_client = AsyncMock()
@ -147,10 +147,8 @@ class TestAgentManagerIntegration:
    async def test_agent_manager_final_answer(self, agent_manager, mock_flow_context):
        """Test agent manager returning final answer"""
        # Arrange
-        mock_flow_context("prompt-request").agent_react.return_value = {
-            "thought": "I have enough information to answer the question",
-            "final-answer": "Machine learning is a field of AI that enables computers to learn from data."
-        }
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I have enough information to answer the question
+Final Answer: Machine learning is a field of AI that enables computers to learn from data."""
        
        question = "What is machine learning?"
        history = []
@ -195,10 +193,8 @@ class TestAgentManagerIntegration:
    async def test_agent_manager_react_with_final_answer(self, agent_manager, mock_flow_context):
        """Test ReAct cycle ending with final answer"""
        # Arrange
-        mock_flow_context("prompt-request").agent_react.return_value = {
-            "thought": "I can provide a direct answer",
-            "final-answer": "Machine learning is a branch of artificial intelligence."
-        }
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I can provide a direct answer
+Final Answer: Machine learning is a branch of artificial intelligence."""
        
        question = "What is machine learning?"
        history = []
@ -258,11 +254,11 @@ class TestAgentManagerIntegration:

        for tool_name, expected_service in tool_scenarios:
            # Arrange
-            mock_flow_context("prompt-request").agent_react.return_value = {
-                "thought": f"I need to use {tool_name}",
-                "action": tool_name,
-                "arguments": {"question": "test question"}
-            }
+            mock_flow_context("prompt-request").agent_react.return_value = f"""Thought: I need to use {tool_name}
+Action: {tool_name}
+Args: {{
+    "question": "test question"
+}}"""
            
            think_callback = AsyncMock()
            observe_callback = AsyncMock()
@ -288,11 +284,11 @@ class TestAgentManagerIntegration:
    async def test_agent_manager_unknown_tool_error(self, agent_manager, mock_flow_context):
        """Test agent manager error handling for unknown tool"""
        # Arrange
-        mock_flow_context("prompt-request").agent_react.return_value = {
-            "thought": "I need to use an unknown tool",
-            "action": "unknown_tool",
-            "arguments": {"param": "value"}
-        }
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to use an unknown tool
+Action: unknown_tool
+Args: {
+    "param": "value"
+}"""
        
        think_callback = AsyncMock()
        observe_callback = AsyncMock()
@ -325,11 +321,11 @@ class TestAgentManagerIntegration:
        question = "Find information about AI and summarize it"
        
        # Mock multi-step reasoning
-        mock_flow_context("prompt-request").agent_react.return_value = {
-            "thought": "I need to search for AI information first",
-            "action": "knowledge_query",
-            "arguments": {"question": "What is artificial intelligence?"}
-        }
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to search for AI information first
+Action: knowledge_query
+Args: {
+    "question": "What is artificial intelligence?"
+}"""

        # Act
        action = await agent_manager.reason(question, [], mock_flow_context)
@ -373,11 +369,12 @@ class TestAgentManagerIntegration:

        for test_case in test_cases:
            # Arrange
-            mock_flow_context("prompt-request").agent_react.return_value = {
-                "thought": f"Using {test_case['action']}",
-                "action": test_case['action'],
-                "arguments": test_case['arguments']
-            }
+            # Format arguments as JSON
+            import json
+            args_json = json.dumps(test_case['arguments'], indent=4)
+            mock_flow_context("prompt-request").agent_react.return_value = f"""Thought: Using {test_case['action']}
+Action: {test_case['action']}
+Args: {args_json}"""
            
            think_callback = AsyncMock()
            observe_callback = AsyncMock()
@ -465,6 +462,193 @@ class TestAgentManagerIntegration:
            # Reset mocks
            mock_flow_context("graph-rag-request").reset_mock()

+    @pytest.mark.asyncio
+    async def test_agent_manager_malformed_response_handling(self, agent_manager, mock_flow_context):
+        """Test agent manager handling of malformed text responses"""
+        # Test cases with expected error messages
+        test_cases = [
+            # Missing action/final answer
+            {
+                "response": "Thought: I need to do something",
+                "error_contains": "Response has thought but no action or final answer"
+            },
+            # Invalid JSON in Args
+            {
+                "response": """Thought: I need to search
+Action: knowledge_query
+Args: {invalid json}""",
+                "error_contains": "Invalid JSON in Args"
+            },
+            # Empty response
+            {
+                "response": "",
+                "error_contains": "Could not parse response"
+            },
+            # Only whitespace
+            {
+                "response": "   \n\t  ",
+                "error_contains": "Could not parse response"
+            },
+            # Missing Args for action (should create empty args dict)
+            {
+                "response": """Thought: I need to search
+Action: knowledge_query""",
+                "error_contains": None  # This should actually succeed with empty args
+            },
+            # Incomplete JSON
+            {
+                "response": """Thought: I need to search
+Action: knowledge_query
+Args: {
+    "question": "test"
+""",
+                "error_contains": "Invalid JSON in Args"
+            },
+        ]
+        
+        for test_case in test_cases:
+            mock_flow_context("prompt-request").agent_react.return_value = test_case["response"]
+            
+            if test_case["error_contains"]:
+                # Should raise an error
+                with pytest.raises(RuntimeError) as exc_info:
+                    await agent_manager.reason("test question", [], mock_flow_context)
+                
+                assert "Failed to parse agent response" in str(exc_info.value)
+                assert test_case["error_contains"] in str(exc_info.value)
+            else:
+                # Should succeed
+                action = await agent_manager.reason("test question", [], mock_flow_context)
+                assert isinstance(action, Action)
+                assert action.name == "knowledge_query"
+                assert action.arguments == {}
+
+    @pytest.mark.asyncio
+    async def test_agent_manager_text_parsing_edge_cases(self, agent_manager, mock_flow_context):
+        """Test edge cases in text parsing"""
+        # Test response with markdown code blocks
+        mock_flow_context("prompt-request").agent_react.return_value = """```
+Thought: I need to search for information
+Action: knowledge_query
+Args: {
+    "question": "What is AI?"
+}
+```"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Action)
+        assert action.thought == "I need to search for information"
+        assert action.name == "knowledge_query"
+        
+        # Test response with extra whitespace
+        mock_flow_context("prompt-request").agent_react.return_value = """
+
+Thought:    I need to think about this    
+Action:   knowledge_query   
+Args:    {
+    "question": "test"
+}
+
+"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Action)
+        assert action.thought == "I need to think about this"
+        assert action.name == "knowledge_query"
+
+    @pytest.mark.asyncio
+    async def test_agent_manager_multiline_content(self, agent_manager, mock_flow_context):
+        """Test handling of multi-line thoughts and final answers"""
+        # Multi-line thought
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to consider multiple factors:
+1. The user's question is complex
+2. I should search for comprehensive information
+3. This requires using the knowledge query tool
+Action: knowledge_query
+Args: {
+    "question": "complex query"
+}"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Action)
+        assert "multiple factors" in action.thought
+        assert "knowledge query tool" in action.thought
+        
+        # Multi-line final answer
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I have gathered enough information
+Final Answer: Here is a comprehensive answer:
+1. First point about the topic
+2. Second point with details
+3. Final conclusion
+
+This covers all aspects of the question."""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Final)
+        assert "First point" in action.final
+        assert "Final conclusion" in action.final
+        assert "all aspects" in action.final
+
+    @pytest.mark.asyncio
+    async def test_agent_manager_json_args_special_characters(self, agent_manager, mock_flow_context):
+        """Test JSON arguments with special characters and edge cases"""
+        # Test with special characters in JSON (properly escaped)
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: Processing special characters
+Action: knowledge_query
+Args: {
+    "question": "What about \\"quotes\\" and 'apostrophes'?",
+    "context": "Line 1\\nLine 2\\tTabbed",
+    "special": "Symbols: @#$%^&*()_+-=[]{}|;':,.<>?"
+}"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Action)
+        assert action.arguments["question"] == 'What about "quotes" and \'apostrophes\'?'
+        assert action.arguments["context"] == "Line 1\nLine 2\tTabbed"
+        assert "@#$%^&*" in action.arguments["special"]
+        
+        # Test with nested JSON
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: Complex arguments
+Action: web_search
+Args: {
+    "query": "test",
+    "options": {
+        "limit": 10,
+        "filters": ["recent", "relevant"],
+        "metadata": {
+            "source": "user",
+            "timestamp": "2024-01-01"
+        }
+    }
+}"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Action)
+        assert action.arguments["options"]["limit"] == 10
+        assert "recent" in action.arguments["options"]["filters"]
+        assert action.arguments["options"]["metadata"]["source"] == "user"
+
+    @pytest.mark.asyncio
+    async def test_agent_manager_final_answer_json_format(self, agent_manager, mock_flow_context):
+        """Test final answers that contain JSON-like content"""
+        # Final answer with JSON content
+        mock_flow_context("prompt-request").agent_react.return_value = """Thought: I can provide the data in JSON format
+Final Answer: {
+    "result": "success",
+    "data": {
+        "name": "Machine Learning",
+        "type": "AI Technology",
+        "applications": ["NLP", "Computer Vision", "Robotics"]
+    },
+    "confidence": 0.95
+}"""
+        
+        action = await agent_manager.reason("test", [], mock_flow_context)
+        assert isinstance(action, Final)
+        # The final answer should preserve the JSON structure as a string
+        assert '"result": "success"' in action.final
+        assert '"applications":' in action.final
+
    @pytest.mark.asyncio
    @pytest.mark.slow
    async def test_agent_manager_performance_with_large_history(self, agent_manager, mock_flow_context):