mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
Release/v1.2 (#457)
* Bump setup.py versions for 1.1 * PoC MCP server (#419) * Very initial MCP server PoC for TrustGraph * Put service on port 8000 * Add MCP container and packages to buildout * Update docs for API/CLI changes in 1.0 (#421) * Update some API basics for the 0.23/1.0 API change * Add MCP container push (#425) * Add command args to the MCP server (#426) * Host and port parameters * Added websocket arg * More docs * MCP client support (#427) - MCP client service - Tool request/response schema - API gateway support for mcp-tool - Message translation for tool request & response - Make mcp-tool using configuration service for information about where the MCP services are. * Feature/react call mcp (#428) Key Features - MCP Tool Integration: Added core MCP tool support with ToolClientSpec and ToolClient classes - API Enhancement: New mcp_tool method for flow-specific tool invocation - CLI Tooling: New tg-invoke-mcp-tool command for testing MCP integration - React Agent Enhancement: Fixed and improved multi-tool invocation capabilities - Tool Management: Enhanced CLI for tool configuration and management Changes - Added MCP tool invocation to API with flow-specific integration - Implemented ToolClientSpec and ToolClient for tool call handling - Updated agent-manager-react to invoke MCP tools with configurable types - Enhanced CLI with new commands and improved help text - Added comprehensive documentation for new CLI commands - Improved tool configuration management Testing - Added tg-invoke-mcp-tool CLI command for isolated MCP integration testing - Enhanced agent capability to invoke multiple tools simultaneously * Test suite executed from CI pipeline (#433) * Test strategy & test cases * Unit tests * Integration tests * Extending test coverage (#434) * Contract tests * Testing embeedings * Agent unit tests * Knowledge pipeline tests * Turn on contract tests * Increase storage test coverage (#435) * Fixing storage and adding tests * PR pipeline only runs quick tests * Empty configuration is returned as empty list, previously was not in response (#436) * Update config util to take files as well as command-line text (#437) * Updated CLI invocation and config model for tools and mcp (#438) * Updated CLI invocation and config model for tools and mcp * CLI anomalies * Tweaked the MCP tool implementation for new model * Update agent implementation to match the new model * Fix agent tools, now all tested * Fixed integration tests * Fix MCP delete tool params * Update Python deps to 1.2 * Update to enable knowledge extraction using the agent framework (#439) * Implement KG extraction agent (kg-extract-agent) * Using ReAct framework (agent-manager-react) * ReAct manager had an issue when emitting JSON, which conflicts which ReAct manager's own JSON messages, so refactored ReAct manager to use traditional ReAct messages, non-JSON structure. * Minor refactor to take the prompt template client out of prompt-template so it can be more readily used by other modules. kg-extract-agent uses this framework. * Migrate from setup.py to pyproject.toml (#440) * Converted setup.py to pyproject.toml * Modern package infrastructure as recommended by py docs * Install missing build deps (#441) * Install missing build deps (#442) * Implement logging strategy (#444) * Logging strategy and convert all prints() to logging invocations * Fix/startup failure (#445) * Fix loggin startup problems * Fix logging startup problems (#446) * Fix logging startup problems (#447) * Fixed Mistral OCR to use current API (#448) * Fixed Mistral OCR to use current API * Added PDF decoder tests * Fix Mistral OCR ident to be standard pdf-decoder (#450) * Fix Mistral OCR ident to be standard pdf-decoder * Correct test * Schema structure refactor (#451) * Write schema refactor spec * Implemented schema refactor spec * Structure data mvp (#452) * Structured data tech spec * Architecture principles * New schemas * Updated schemas and specs * Object extractor * Add .coveragerc * New tests * Cassandra object storage * Trying to object extraction working, issues exist * Validate librarian collection (#453) * Fix token chunker, broken API invocation (#454) * Fix token chunker, broken API invocation (#455) * Knowledge load utility CLI (#456) * Knowledge loader * More tests
This commit is contained in:
parent
c85ba197be
commit
89be656990
509 changed files with 49632 additions and 5159 deletions
532
tests/unit/test_agent/test_reasoning_engine.py
Normal file
532
tests/unit/test_agent/test_reasoning_engine.py
Normal file
|
|
@ -0,0 +1,532 @@
|
|||
"""
|
||||
Unit tests for reasoning engine logic
|
||||
|
||||
Tests the core reasoning algorithms that power agent decision-making,
|
||||
including question analysis, reasoning chain construction, and
|
||||
decision-making processes.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, AsyncMock
|
||||
|
||||
|
||||
class TestReasoningEngineLogic:
|
||||
"""Test cases for reasoning engine business logic"""
|
||||
|
||||
def test_question_analysis_and_categorization(self):
|
||||
"""Test analysis and categorization of user questions"""
|
||||
# Arrange
|
||||
def analyze_question(question):
|
||||
"""Analyze question to determine type and complexity"""
|
||||
question_lower = question.lower().strip()
|
||||
|
||||
analysis = {
|
||||
"type": "unknown",
|
||||
"complexity": "simple",
|
||||
"entities": [],
|
||||
"intent": "information_seeking",
|
||||
"requires_tools": [],
|
||||
"confidence": 0.5
|
||||
}
|
||||
|
||||
# Determine question type
|
||||
question_words = question_lower.split()
|
||||
if any(word in question_words for word in ["what", "who", "where", "when"]):
|
||||
analysis["type"] = "factual"
|
||||
analysis["intent"] = "information_seeking"
|
||||
analysis["confidence"] = 0.8
|
||||
elif any(word in question_words for word in ["how", "why"]):
|
||||
analysis["type"] = "explanatory"
|
||||
analysis["intent"] = "explanation_seeking"
|
||||
analysis["complexity"] = "moderate"
|
||||
analysis["confidence"] = 0.7
|
||||
elif any(word in question_lower for word in ["calculate", "+", "-", "*", "/", "="]):
|
||||
analysis["type"] = "computational"
|
||||
analysis["intent"] = "calculation"
|
||||
analysis["requires_tools"] = ["calculator"]
|
||||
analysis["confidence"] = 0.9
|
||||
elif any(phrase in question_lower for phrase in ["tell me about", "about"]):
|
||||
analysis["type"] = "factual"
|
||||
analysis["intent"] = "information_seeking"
|
||||
analysis["confidence"] = 0.7
|
||||
|
||||
# Detect entities (simplified)
|
||||
known_entities = ["france", "paris", "openai", "microsoft", "python", "ai"]
|
||||
analysis["entities"] = [entity for entity in known_entities if entity in question_lower]
|
||||
|
||||
# Determine complexity
|
||||
if len(question.split()) > 15:
|
||||
analysis["complexity"] = "complex"
|
||||
elif len(question.split()) > 8:
|
||||
analysis["complexity"] = "moderate"
|
||||
|
||||
# Determine required tools
|
||||
if analysis["type"] == "computational":
|
||||
analysis["requires_tools"] = ["calculator"]
|
||||
elif analysis["entities"]:
|
||||
analysis["requires_tools"] = ["knowledge_search", "graph_rag"]
|
||||
elif analysis["type"] in ["factual", "explanatory"]:
|
||||
analysis["requires_tools"] = ["knowledge_search"]
|
||||
|
||||
return analysis
|
||||
|
||||
test_cases = [
|
||||
("What is the capital of France?", "factual", ["france"], ["knowledge_search", "graph_rag"]),
|
||||
("How does machine learning work?", "explanatory", [], ["knowledge_search"]),
|
||||
("Calculate 15 * 8", "computational", [], ["calculator"]),
|
||||
("Tell me about OpenAI", "factual", ["openai"], ["knowledge_search", "graph_rag"]),
|
||||
("Why is Python popular for AI development?", "explanatory", ["python", "ai"], ["knowledge_search"])
|
||||
]
|
||||
|
||||
# Act & Assert
|
||||
for question, expected_type, expected_entities, expected_tools in test_cases:
|
||||
analysis = analyze_question(question)
|
||||
|
||||
assert analysis["type"] == expected_type, f"Question '{question}' got type '{analysis['type']}', expected '{expected_type}'"
|
||||
assert all(entity in analysis["entities"] for entity in expected_entities)
|
||||
assert any(tool in expected_tools for tool in analysis["requires_tools"])
|
||||
assert analysis["confidence"] > 0.5
|
||||
|
||||
def test_reasoning_chain_construction(self):
|
||||
"""Test construction of logical reasoning chains"""
|
||||
# Arrange
|
||||
def construct_reasoning_chain(question, available_tools, context=None):
|
||||
"""Construct a logical chain of reasoning steps"""
|
||||
reasoning_chain = []
|
||||
|
||||
# Analyze question
|
||||
question_lower = question.lower()
|
||||
|
||||
# Multi-step questions requiring decomposition
|
||||
if "capital of" in question_lower and ("population" in question_lower or "size" in question_lower):
|
||||
reasoning_chain.extend([
|
||||
{
|
||||
"step": 1,
|
||||
"type": "decomposition",
|
||||
"description": "Break down complex question into sub-questions",
|
||||
"sub_questions": ["What is the capital?", "What is the population/size?"]
|
||||
},
|
||||
{
|
||||
"step": 2,
|
||||
"type": "information_gathering",
|
||||
"description": "Find the capital city",
|
||||
"tool": "knowledge_search",
|
||||
"query": f"capital of {question_lower.split('capital of')[1].split()[0]}"
|
||||
},
|
||||
{
|
||||
"step": 3,
|
||||
"type": "information_gathering",
|
||||
"description": "Find population/size of the capital",
|
||||
"tool": "knowledge_search",
|
||||
"query": "population size [CAPITAL_CITY]"
|
||||
},
|
||||
{
|
||||
"step": 4,
|
||||
"type": "synthesis",
|
||||
"description": "Combine information to answer original question"
|
||||
}
|
||||
])
|
||||
|
||||
elif "relationship" in question_lower or "connection" in question_lower:
|
||||
reasoning_chain.extend([
|
||||
{
|
||||
"step": 1,
|
||||
"type": "entity_identification",
|
||||
"description": "Identify entities mentioned in question"
|
||||
},
|
||||
{
|
||||
"step": 2,
|
||||
"type": "relationship_exploration",
|
||||
"description": "Explore relationships between entities",
|
||||
"tool": "graph_rag"
|
||||
},
|
||||
{
|
||||
"step": 3,
|
||||
"type": "analysis",
|
||||
"description": "Analyze relationship patterns and significance"
|
||||
}
|
||||
])
|
||||
|
||||
elif any(op in question_lower for op in ["+", "-", "*", "/", "calculate"]):
|
||||
reasoning_chain.extend([
|
||||
{
|
||||
"step": 1,
|
||||
"type": "expression_parsing",
|
||||
"description": "Parse mathematical expression from question"
|
||||
},
|
||||
{
|
||||
"step": 2,
|
||||
"type": "calculation",
|
||||
"description": "Perform calculation",
|
||||
"tool": "calculator"
|
||||
},
|
||||
{
|
||||
"step": 3,
|
||||
"type": "result_formatting",
|
||||
"description": "Format result appropriately"
|
||||
}
|
||||
])
|
||||
|
||||
else:
|
||||
# Simple information seeking
|
||||
reasoning_chain.extend([
|
||||
{
|
||||
"step": 1,
|
||||
"type": "information_gathering",
|
||||
"description": "Search for relevant information",
|
||||
"tool": "knowledge_search"
|
||||
},
|
||||
{
|
||||
"step": 2,
|
||||
"type": "response_formulation",
|
||||
"description": "Formulate clear response"
|
||||
}
|
||||
])
|
||||
|
||||
return reasoning_chain
|
||||
|
||||
available_tools = ["knowledge_search", "graph_rag", "calculator"]
|
||||
|
||||
# Act & Assert
|
||||
# Test complex multi-step question
|
||||
complex_chain = construct_reasoning_chain(
|
||||
"What is the population of the capital of France?",
|
||||
available_tools
|
||||
)
|
||||
assert len(complex_chain) == 4
|
||||
assert complex_chain[0]["type"] == "decomposition"
|
||||
assert complex_chain[1]["tool"] == "knowledge_search"
|
||||
|
||||
# Test relationship question
|
||||
relationship_chain = construct_reasoning_chain(
|
||||
"What is the relationship between Paris and France?",
|
||||
available_tools
|
||||
)
|
||||
assert any(step["type"] == "relationship_exploration" for step in relationship_chain)
|
||||
assert any(step.get("tool") == "graph_rag" for step in relationship_chain)
|
||||
|
||||
# Test calculation question
|
||||
calc_chain = construct_reasoning_chain("Calculate 15 * 8", available_tools)
|
||||
assert any(step["type"] == "calculation" for step in calc_chain)
|
||||
assert any(step.get("tool") == "calculator" for step in calc_chain)
|
||||
|
||||
def test_decision_making_algorithms(self):
|
||||
"""Test decision-making algorithms for tool selection and strategy"""
|
||||
# Arrange
|
||||
def make_reasoning_decisions(question, available_tools, context=None, constraints=None):
|
||||
"""Make decisions about reasoning approach and tool usage"""
|
||||
decisions = {
|
||||
"primary_strategy": "direct_search",
|
||||
"selected_tools": [],
|
||||
"reasoning_depth": "shallow",
|
||||
"confidence": 0.5,
|
||||
"fallback_strategy": "general_search"
|
||||
}
|
||||
|
||||
question_lower = question.lower()
|
||||
constraints = constraints or {}
|
||||
|
||||
# Strategy selection based on question type
|
||||
if "calculate" in question_lower or any(op in question_lower for op in ["+", "-", "*", "/"]):
|
||||
decisions["primary_strategy"] = "calculation"
|
||||
decisions["selected_tools"] = ["calculator"]
|
||||
decisions["reasoning_depth"] = "shallow"
|
||||
decisions["confidence"] = 0.9
|
||||
|
||||
elif "relationship" in question_lower or "connect" in question_lower:
|
||||
decisions["primary_strategy"] = "graph_exploration"
|
||||
decisions["selected_tools"] = ["graph_rag", "knowledge_search"]
|
||||
decisions["reasoning_depth"] = "deep"
|
||||
decisions["confidence"] = 0.8
|
||||
|
||||
elif any(word in question_lower for word in ["what", "who", "where", "when"]):
|
||||
decisions["primary_strategy"] = "factual_lookup"
|
||||
decisions["selected_tools"] = ["knowledge_search"]
|
||||
decisions["reasoning_depth"] = "moderate"
|
||||
decisions["confidence"] = 0.7
|
||||
|
||||
elif any(word in question_lower for word in ["how", "why", "explain"]):
|
||||
decisions["primary_strategy"] = "explanatory_reasoning"
|
||||
decisions["selected_tools"] = ["knowledge_search", "graph_rag"]
|
||||
decisions["reasoning_depth"] = "deep"
|
||||
decisions["confidence"] = 0.6
|
||||
|
||||
# Apply constraints
|
||||
if constraints.get("max_tools", 0) > 0:
|
||||
decisions["selected_tools"] = decisions["selected_tools"][:constraints["max_tools"]]
|
||||
|
||||
if constraints.get("fast_mode", False):
|
||||
decisions["reasoning_depth"] = "shallow"
|
||||
decisions["selected_tools"] = decisions["selected_tools"][:1]
|
||||
|
||||
# Filter by available tools
|
||||
decisions["selected_tools"] = [tool for tool in decisions["selected_tools"] if tool in available_tools]
|
||||
|
||||
if not decisions["selected_tools"]:
|
||||
decisions["primary_strategy"] = "general_search"
|
||||
decisions["selected_tools"] = ["knowledge_search"] if "knowledge_search" in available_tools else []
|
||||
decisions["confidence"] = 0.3
|
||||
|
||||
return decisions
|
||||
|
||||
available_tools = ["knowledge_search", "graph_rag", "calculator"]
|
||||
|
||||
test_cases = [
|
||||
("What is 2 + 2?", "calculation", ["calculator"], 0.9),
|
||||
("What is the relationship between Paris and France?", "graph_exploration", ["graph_rag"], 0.8),
|
||||
("Who is the president of France?", "factual_lookup", ["knowledge_search"], 0.7),
|
||||
("How does photosynthesis work?", "explanatory_reasoning", ["knowledge_search"], 0.6)
|
||||
]
|
||||
|
||||
# Act & Assert
|
||||
for question, expected_strategy, expected_tools, min_confidence in test_cases:
|
||||
decisions = make_reasoning_decisions(question, available_tools)
|
||||
|
||||
assert decisions["primary_strategy"] == expected_strategy
|
||||
assert any(tool in decisions["selected_tools"] for tool in expected_tools)
|
||||
assert decisions["confidence"] >= min_confidence
|
||||
|
||||
# Test with constraints
|
||||
constrained_decisions = make_reasoning_decisions(
|
||||
"How does machine learning work?",
|
||||
available_tools,
|
||||
constraints={"fast_mode": True}
|
||||
)
|
||||
assert constrained_decisions["reasoning_depth"] == "shallow"
|
||||
assert len(constrained_decisions["selected_tools"]) <= 1
|
||||
|
||||
def test_confidence_scoring_logic(self):
|
||||
"""Test confidence scoring for reasoning steps and decisions"""
|
||||
# Arrange
|
||||
def calculate_confidence_score(reasoning_step, available_evidence, tool_reliability=None):
|
||||
"""Calculate confidence score for a reasoning step"""
|
||||
base_confidence = 0.5
|
||||
tool_reliability = tool_reliability or {}
|
||||
|
||||
step_type = reasoning_step.get("type", "unknown")
|
||||
tool_used = reasoning_step.get("tool")
|
||||
evidence_quality = available_evidence.get("quality", "medium")
|
||||
evidence_sources = available_evidence.get("sources", 1)
|
||||
|
||||
# Adjust confidence based on step type
|
||||
confidence_modifiers = {
|
||||
"calculation": 0.4, # High confidence for math
|
||||
"factual_lookup": 0.2, # Moderate confidence for facts
|
||||
"relationship_exploration": 0.1, # Lower confidence for complex relationships
|
||||
"synthesis": -0.1, # Slightly lower for synthesized information
|
||||
"speculation": -0.3 # Much lower for speculative reasoning
|
||||
}
|
||||
|
||||
base_confidence += confidence_modifiers.get(step_type, 0)
|
||||
|
||||
# Adjust for tool reliability
|
||||
if tool_used and tool_used in tool_reliability:
|
||||
tool_score = tool_reliability[tool_used]
|
||||
base_confidence += (tool_score - 0.5) * 0.2 # Scale tool reliability impact
|
||||
|
||||
# Adjust for evidence quality
|
||||
evidence_modifiers = {
|
||||
"high": 0.2,
|
||||
"medium": 0.0,
|
||||
"low": -0.2,
|
||||
"none": -0.4
|
||||
}
|
||||
base_confidence += evidence_modifiers.get(evidence_quality, 0)
|
||||
|
||||
# Adjust for multiple sources
|
||||
if evidence_sources > 1:
|
||||
base_confidence += min(0.2, evidence_sources * 0.05)
|
||||
|
||||
# Cap between 0 and 1
|
||||
return max(0.0, min(1.0, base_confidence))
|
||||
|
||||
tool_reliability = {
|
||||
"calculator": 0.95,
|
||||
"knowledge_search": 0.8,
|
||||
"graph_rag": 0.7
|
||||
}
|
||||
|
||||
test_cases = [
|
||||
(
|
||||
{"type": "calculation", "tool": "calculator"},
|
||||
{"quality": "high", "sources": 1},
|
||||
0.9 # Should be very high confidence
|
||||
),
|
||||
(
|
||||
{"type": "factual_lookup", "tool": "knowledge_search"},
|
||||
{"quality": "medium", "sources": 2},
|
||||
0.8 # Good confidence with multiple sources
|
||||
),
|
||||
(
|
||||
{"type": "speculation", "tool": None},
|
||||
{"quality": "low", "sources": 1},
|
||||
0.0 # Very low confidence for speculation with low quality evidence
|
||||
),
|
||||
(
|
||||
{"type": "relationship_exploration", "tool": "graph_rag"},
|
||||
{"quality": "high", "sources": 3},
|
||||
0.7 # Moderate-high confidence
|
||||
)
|
||||
]
|
||||
|
||||
# Act & Assert
|
||||
for reasoning_step, evidence, expected_min_confidence in test_cases:
|
||||
confidence = calculate_confidence_score(reasoning_step, evidence, tool_reliability)
|
||||
assert confidence >= expected_min_confidence - 0.15 # Allow larger tolerance for confidence calculations
|
||||
assert 0 <= confidence <= 1
|
||||
|
||||
def test_reasoning_validation_logic(self):
|
||||
"""Test validation of reasoning chains for logical consistency"""
|
||||
# Arrange
|
||||
def validate_reasoning_chain(reasoning_chain):
|
||||
"""Validate logical consistency of reasoning chain"""
|
||||
validation_results = {
|
||||
"is_valid": True,
|
||||
"issues": [],
|
||||
"completeness_score": 0.0,
|
||||
"logical_consistency": 0.0
|
||||
}
|
||||
|
||||
if not reasoning_chain:
|
||||
validation_results["is_valid"] = False
|
||||
validation_results["issues"].append("Empty reasoning chain")
|
||||
return validation_results
|
||||
|
||||
# Check for required components
|
||||
step_types = [step.get("type") for step in reasoning_chain]
|
||||
|
||||
# Must have some form of information gathering or processing
|
||||
has_information_step = any(t in step_types for t in [
|
||||
"information_gathering", "calculation", "relationship_exploration"
|
||||
])
|
||||
|
||||
if not has_information_step:
|
||||
validation_results["issues"].append("No information gathering step")
|
||||
|
||||
# Check for logical flow
|
||||
for i, step in enumerate(reasoning_chain):
|
||||
# Each step should have required fields
|
||||
if "type" not in step:
|
||||
validation_results["issues"].append(f"Step {i+1} missing type")
|
||||
|
||||
if "description" not in step:
|
||||
validation_results["issues"].append(f"Step {i+1} missing description")
|
||||
|
||||
# Tool steps should specify tool
|
||||
if step.get("type") in ["information_gathering", "calculation", "relationship_exploration"]:
|
||||
if "tool" not in step:
|
||||
validation_results["issues"].append(f"Step {i+1} missing tool specification")
|
||||
|
||||
# Check for synthesis or conclusion
|
||||
has_synthesis = any(t in step_types for t in [
|
||||
"synthesis", "response_formulation", "result_formatting"
|
||||
])
|
||||
|
||||
if not has_synthesis and len(reasoning_chain) > 1:
|
||||
validation_results["issues"].append("Multi-step reasoning missing synthesis")
|
||||
|
||||
# Calculate scores
|
||||
completeness_items = [
|
||||
has_information_step,
|
||||
has_synthesis or len(reasoning_chain) == 1,
|
||||
all("description" in step for step in reasoning_chain),
|
||||
len(reasoning_chain) >= 1
|
||||
]
|
||||
validation_results["completeness_score"] = sum(completeness_items) / len(completeness_items)
|
||||
|
||||
consistency_items = [
|
||||
len(validation_results["issues"]) == 0,
|
||||
len(reasoning_chain) > 0,
|
||||
all("type" in step for step in reasoning_chain)
|
||||
]
|
||||
validation_results["logical_consistency"] = sum(consistency_items) / len(consistency_items)
|
||||
|
||||
validation_results["is_valid"] = len(validation_results["issues"]) == 0
|
||||
|
||||
return validation_results
|
||||
|
||||
# Test cases
|
||||
valid_chain = [
|
||||
{"type": "information_gathering", "description": "Search for information", "tool": "knowledge_search"},
|
||||
{"type": "response_formulation", "description": "Formulate response"}
|
||||
]
|
||||
|
||||
invalid_chain = [
|
||||
{"description": "Do something"}, # Missing type
|
||||
{"type": "information_gathering"} # Missing description and tool
|
||||
]
|
||||
|
||||
empty_chain = []
|
||||
|
||||
# Act & Assert
|
||||
valid_result = validate_reasoning_chain(valid_chain)
|
||||
assert valid_result["is_valid"] is True
|
||||
assert len(valid_result["issues"]) == 0
|
||||
assert valid_result["completeness_score"] > 0.8
|
||||
|
||||
invalid_result = validate_reasoning_chain(invalid_chain)
|
||||
assert invalid_result["is_valid"] is False
|
||||
assert len(invalid_result["issues"]) > 0
|
||||
|
||||
empty_result = validate_reasoning_chain(empty_chain)
|
||||
assert empty_result["is_valid"] is False
|
||||
assert "Empty reasoning chain" in empty_result["issues"]
|
||||
|
||||
def test_adaptive_reasoning_strategies(self):
|
||||
"""Test adaptive reasoning that adjusts based on context and feedback"""
|
||||
# Arrange
|
||||
def adapt_reasoning_strategy(initial_strategy, feedback, context=None):
|
||||
"""Adapt reasoning strategy based on feedback and context"""
|
||||
adapted_strategy = initial_strategy.copy()
|
||||
context = context or {}
|
||||
|
||||
# Analyze feedback
|
||||
if feedback.get("accuracy", 0) < 0.5:
|
||||
# Low accuracy - need different approach
|
||||
if initial_strategy["primary_strategy"] == "direct_search":
|
||||
adapted_strategy["primary_strategy"] = "multi_source_verification"
|
||||
adapted_strategy["selected_tools"].extend(["graph_rag"])
|
||||
adapted_strategy["reasoning_depth"] = "deep"
|
||||
|
||||
elif initial_strategy["primary_strategy"] == "factual_lookup":
|
||||
adapted_strategy["primary_strategy"] = "explanatory_reasoning"
|
||||
adapted_strategy["reasoning_depth"] = "deep"
|
||||
|
||||
if feedback.get("completeness", 0) < 0.5:
|
||||
# Incomplete answer - need more comprehensive approach
|
||||
adapted_strategy["reasoning_depth"] = "deep"
|
||||
if "graph_rag" not in adapted_strategy["selected_tools"]:
|
||||
adapted_strategy["selected_tools"].append("graph_rag")
|
||||
|
||||
if feedback.get("response_time", 0) > context.get("max_response_time", 30):
|
||||
# Too slow - simplify approach
|
||||
adapted_strategy["reasoning_depth"] = "shallow"
|
||||
adapted_strategy["selected_tools"] = adapted_strategy["selected_tools"][:1]
|
||||
|
||||
# Update confidence based on adaptation
|
||||
if adapted_strategy != initial_strategy:
|
||||
adapted_strategy["confidence"] = max(0.3, adapted_strategy["confidence"] - 0.2)
|
||||
|
||||
return adapted_strategy
|
||||
|
||||
initial_strategy = {
|
||||
"primary_strategy": "direct_search",
|
||||
"selected_tools": ["knowledge_search"],
|
||||
"reasoning_depth": "shallow",
|
||||
"confidence": 0.7
|
||||
}
|
||||
|
||||
# Test adaptation to low accuracy feedback
|
||||
low_accuracy_feedback = {"accuracy": 0.3, "completeness": 0.8, "response_time": 10}
|
||||
adapted = adapt_reasoning_strategy(initial_strategy, low_accuracy_feedback)
|
||||
|
||||
assert adapted["primary_strategy"] != initial_strategy["primary_strategy"]
|
||||
assert "graph_rag" in adapted["selected_tools"]
|
||||
assert adapted["reasoning_depth"] == "deep"
|
||||
|
||||
# Test adaptation to slow response
|
||||
slow_feedback = {"accuracy": 0.8, "completeness": 0.8, "response_time": 40}
|
||||
adapted_fast = adapt_reasoning_strategy(initial_strategy, slow_feedback, {"max_response_time": 30})
|
||||
|
||||
assert adapted_fast["reasoning_depth"] == "shallow"
|
||||
assert len(adapted_fast["selected_tools"]) <= 1
|
||||
Loading…
Add table
Add a link
Reference in a new issue