Update to enable knowledge extraction using the agent framework (#439)

* Implement KG extraction agent (kg-extract-agent)

* Using ReAct framework (agent-manager-react)
 
* ReAct manager had an issue when emitting JSON, which conflicts which ReAct manager's own JSON messages, so refactored ReAct manager to use traditional ReAct messages, non-JSON structure.
 
* Minor refactor to take the prompt template client out of prompt-template so it can be more readily used by other modules. kg-extract-agent uses this framework.
This commit is contained in:
cybermaggedon 2025-07-21 14:31:57 +01:00 committed by GitHub
parent 1fe4ed5226
commit d83e4e3d59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 3192 additions and 799 deletions

View file

@ -28,11 +28,11 @@ class TestAgentManagerIntegration:
# Mock prompt client
prompt_client = AsyncMock()
prompt_client.agent_react.return_value = {
"thought": "I need to search for information about machine learning",
"action": "knowledge_query",
"arguments": {"question": "What is machine learning?"}
}
prompt_client.agent_react.return_value = """Thought: I need to search for information about machine learning
Action: knowledge_query
Args: {
"question": "What is machine learning?"
}"""
# Mock graph RAG client
graph_rag_client = AsyncMock()
@ -147,10 +147,8 @@ class TestAgentManagerIntegration:
async def test_agent_manager_final_answer(self, agent_manager, mock_flow_context):
"""Test agent manager returning final answer"""
# Arrange
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": "I have enough information to answer the question",
"final-answer": "Machine learning is a field of AI that enables computers to learn from data."
}
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I have enough information to answer the question
Final Answer: Machine learning is a field of AI that enables computers to learn from data."""
question = "What is machine learning?"
history = []
@ -195,10 +193,8 @@ class TestAgentManagerIntegration:
async def test_agent_manager_react_with_final_answer(self, agent_manager, mock_flow_context):
"""Test ReAct cycle ending with final answer"""
# Arrange
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": "I can provide a direct answer",
"final-answer": "Machine learning is a branch of artificial intelligence."
}
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I can provide a direct answer
Final Answer: Machine learning is a branch of artificial intelligence."""
question = "What is machine learning?"
history = []
@ -258,11 +254,11 @@ class TestAgentManagerIntegration:
for tool_name, expected_service in tool_scenarios:
# Arrange
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": f"I need to use {tool_name}",
"action": tool_name,
"arguments": {"question": "test question"}
}
mock_flow_context("prompt-request").agent_react.return_value = f"""Thought: I need to use {tool_name}
Action: {tool_name}
Args: {{
"question": "test question"
}}"""
think_callback = AsyncMock()
observe_callback = AsyncMock()
@ -288,11 +284,11 @@ class TestAgentManagerIntegration:
async def test_agent_manager_unknown_tool_error(self, agent_manager, mock_flow_context):
"""Test agent manager error handling for unknown tool"""
# Arrange
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": "I need to use an unknown tool",
"action": "unknown_tool",
"arguments": {"param": "value"}
}
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to use an unknown tool
Action: unknown_tool
Args: {
"param": "value"
}"""
think_callback = AsyncMock()
observe_callback = AsyncMock()
@ -325,11 +321,11 @@ class TestAgentManagerIntegration:
question = "Find information about AI and summarize it"
# Mock multi-step reasoning
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": "I need to search for AI information first",
"action": "knowledge_query",
"arguments": {"question": "What is artificial intelligence?"}
}
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to search for AI information first
Action: knowledge_query
Args: {
"question": "What is artificial intelligence?"
}"""
# Act
action = await agent_manager.reason(question, [], mock_flow_context)
@ -373,11 +369,12 @@ class TestAgentManagerIntegration:
for test_case in test_cases:
# Arrange
mock_flow_context("prompt-request").agent_react.return_value = {
"thought": f"Using {test_case['action']}",
"action": test_case['action'],
"arguments": test_case['arguments']
}
# Format arguments as JSON
import json
args_json = json.dumps(test_case['arguments'], indent=4)
mock_flow_context("prompt-request").agent_react.return_value = f"""Thought: Using {test_case['action']}
Action: {test_case['action']}
Args: {args_json}"""
think_callback = AsyncMock()
observe_callback = AsyncMock()
@ -465,6 +462,193 @@ class TestAgentManagerIntegration:
# Reset mocks
mock_flow_context("graph-rag-request").reset_mock()
@pytest.mark.asyncio
async def test_agent_manager_malformed_response_handling(self, agent_manager, mock_flow_context):
"""Test agent manager handling of malformed text responses"""
# Test cases with expected error messages
test_cases = [
# Missing action/final answer
{
"response": "Thought: I need to do something",
"error_contains": "Response has thought but no action or final answer"
},
# Invalid JSON in Args
{
"response": """Thought: I need to search
Action: knowledge_query
Args: {invalid json}""",
"error_contains": "Invalid JSON in Args"
},
# Empty response
{
"response": "",
"error_contains": "Could not parse response"
},
# Only whitespace
{
"response": " \n\t ",
"error_contains": "Could not parse response"
},
# Missing Args for action (should create empty args dict)
{
"response": """Thought: I need to search
Action: knowledge_query""",
"error_contains": None # This should actually succeed with empty args
},
# Incomplete JSON
{
"response": """Thought: I need to search
Action: knowledge_query
Args: {
"question": "test"
""",
"error_contains": "Invalid JSON in Args"
},
]
for test_case in test_cases:
mock_flow_context("prompt-request").agent_react.return_value = test_case["response"]
if test_case["error_contains"]:
# Should raise an error
with pytest.raises(RuntimeError) as exc_info:
await agent_manager.reason("test question", [], mock_flow_context)
assert "Failed to parse agent response" in str(exc_info.value)
assert test_case["error_contains"] in str(exc_info.value)
else:
# Should succeed
action = await agent_manager.reason("test question", [], mock_flow_context)
assert isinstance(action, Action)
assert action.name == "knowledge_query"
assert action.arguments == {}
@pytest.mark.asyncio
async def test_agent_manager_text_parsing_edge_cases(self, agent_manager, mock_flow_context):
"""Test edge cases in text parsing"""
# Test response with markdown code blocks
mock_flow_context("prompt-request").agent_react.return_value = """```
Thought: I need to search for information
Action: knowledge_query
Args: {
"question": "What is AI?"
}
```"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Action)
assert action.thought == "I need to search for information"
assert action.name == "knowledge_query"
# Test response with extra whitespace
mock_flow_context("prompt-request").agent_react.return_value = """
Thought: I need to think about this
Action: knowledge_query
Args: {
"question": "test"
}
"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Action)
assert action.thought == "I need to think about this"
assert action.name == "knowledge_query"
@pytest.mark.asyncio
async def test_agent_manager_multiline_content(self, agent_manager, mock_flow_context):
"""Test handling of multi-line thoughts and final answers"""
# Multi-line thought
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I need to consider multiple factors:
1. The user's question is complex
2. I should search for comprehensive information
3. This requires using the knowledge query tool
Action: knowledge_query
Args: {
"question": "complex query"
}"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Action)
assert "multiple factors" in action.thought
assert "knowledge query tool" in action.thought
# Multi-line final answer
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I have gathered enough information
Final Answer: Here is a comprehensive answer:
1. First point about the topic
2. Second point with details
3. Final conclusion
This covers all aspects of the question."""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Final)
assert "First point" in action.final
assert "Final conclusion" in action.final
assert "all aspects" in action.final
@pytest.mark.asyncio
async def test_agent_manager_json_args_special_characters(self, agent_manager, mock_flow_context):
"""Test JSON arguments with special characters and edge cases"""
# Test with special characters in JSON (properly escaped)
mock_flow_context("prompt-request").agent_react.return_value = """Thought: Processing special characters
Action: knowledge_query
Args: {
"question": "What about \\"quotes\\" and 'apostrophes'?",
"context": "Line 1\\nLine 2\\tTabbed",
"special": "Symbols: @#$%^&*()_+-=[]{}|;':,.<>?"
}"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Action)
assert action.arguments["question"] == 'What about "quotes" and \'apostrophes\'?'
assert action.arguments["context"] == "Line 1\nLine 2\tTabbed"
assert "@#$%^&*" in action.arguments["special"]
# Test with nested JSON
mock_flow_context("prompt-request").agent_react.return_value = """Thought: Complex arguments
Action: web_search
Args: {
"query": "test",
"options": {
"limit": 10,
"filters": ["recent", "relevant"],
"metadata": {
"source": "user",
"timestamp": "2024-01-01"
}
}
}"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Action)
assert action.arguments["options"]["limit"] == 10
assert "recent" in action.arguments["options"]["filters"]
assert action.arguments["options"]["metadata"]["source"] == "user"
@pytest.mark.asyncio
async def test_agent_manager_final_answer_json_format(self, agent_manager, mock_flow_context):
"""Test final answers that contain JSON-like content"""
# Final answer with JSON content
mock_flow_context("prompt-request").agent_react.return_value = """Thought: I can provide the data in JSON format
Final Answer: {
"result": "success",
"data": {
"name": "Machine Learning",
"type": "AI Technology",
"applications": ["NLP", "Computer Vision", "Robotics"]
},
"confidence": 0.95
}"""
action = await agent_manager.reason("test", [], mock_flow_context)
assert isinstance(action, Final)
# The final answer should preserve the JSON structure as a string
assert '"result": "success"' in action.final
assert '"applications":' in action.final
@pytest.mark.asyncio
@pytest.mark.slow
async def test_agent_manager_performance_with_large_history(self, agent_manager, mock_flow_context):