mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-29 10:26:21 +02:00
Merge 2.0 to master (#651)
This commit is contained in:
parent
3666ece2c5
commit
b9d7bf9a8b
212 changed files with 13940 additions and 6180 deletions
|
|
@ -12,7 +12,7 @@ import json
|
|||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from trustgraph.extract.kg.agent.extract import Processor as AgentKgExtractor
|
||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Value, Error
|
||||
from trustgraph.schema import Chunk, Triple, Triples, Metadata, Term, Error, IRI, LITERAL
|
||||
from trustgraph.schema import EntityContext, EntityContexts, AgentRequest, AgentResponse
|
||||
from trustgraph.rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||
from trustgraph.template.prompt_manager import PromptManager
|
||||
|
|
@ -30,38 +30,16 @@ class TestAgentKgExtractionIntegration:
|
|||
# Mock agent client
|
||||
agent_client = AsyncMock()
|
||||
|
||||
# Mock successful agent response
|
||||
# Mock successful agent response in JSONL format
|
||||
def mock_agent_response(recipient, question):
|
||||
# Simulate agent processing and return structured response
|
||||
# Simulate agent processing and return structured JSONL response
|
||||
mock_response = MagicMock()
|
||||
mock_response.error = None
|
||||
mock_response.answer = '''```json
|
||||
{
|
||||
"definitions": [
|
||||
{
|
||||
"entity": "Machine Learning",
|
||||
"definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."
|
||||
},
|
||||
{
|
||||
"entity": "Neural Networks",
|
||||
"definition": "Computing systems inspired by biological neural networks that process information."
|
||||
}
|
||||
],
|
||||
"relationships": [
|
||||
{
|
||||
"subject": "Machine Learning",
|
||||
"predicate": "is_subset_of",
|
||||
"object": "Artificial Intelligence",
|
||||
"object-entity": true
|
||||
},
|
||||
{
|
||||
"subject": "Neural Networks",
|
||||
"predicate": "used_in",
|
||||
"object": "Machine Learning",
|
||||
"object-entity": true
|
||||
}
|
||||
]
|
||||
}
|
||||
{"type": "definition", "entity": "Machine Learning", "definition": "A subset of artificial intelligence that enables computers to learn from data without explicit programming."}
|
||||
{"type": "definition", "entity": "Neural Networks", "definition": "Computing systems inspired by biological neural networks that process information."}
|
||||
{"type": "relationship", "subject": "Machine Learning", "predicate": "is_subset_of", "object": "Artificial Intelligence", "object-entity": true}
|
||||
{"type": "relationship", "subject": "Neural Networks", "predicate": "used_in", "object": "Machine Learning", "object-entity": true}
|
||||
```'''
|
||||
return mock_response.answer
|
||||
|
||||
|
|
@ -100,9 +78,9 @@ class TestAgentKgExtractionIntegration:
|
|||
id="doc123",
|
||||
metadata=[
|
||||
Triple(
|
||||
s=Value(value="doc123", is_uri=True),
|
||||
p=Value(value="http://example.org/type", is_uri=True),
|
||||
o=Value(value="document", is_uri=False)
|
||||
s=Term(type=IRI, iri="doc123"),
|
||||
p=Term(type=IRI, iri="http://example.org/type"),
|
||||
o=Term(type=LITERAL, value="document")
|
||||
)
|
||||
]
|
||||
)
|
||||
|
|
@ -120,7 +98,7 @@ class TestAgentKgExtractionIntegration:
|
|||
|
||||
# Copy the methods we want to test
|
||||
extractor.to_uri = real_extractor.to_uri
|
||||
extractor.parse_json = real_extractor.parse_json
|
||||
extractor.parse_jsonl = real_extractor.parse_jsonl
|
||||
extractor.process_extraction_data = real_extractor.process_extraction_data
|
||||
extractor.emit_triples = real_extractor.emit_triples
|
||||
extractor.emit_entity_contexts = real_extractor.emit_entity_contexts
|
||||
|
|
@ -156,7 +134,7 @@ class TestAgentKgExtractionIntegration:
|
|||
agent_response = agent_client.invoke(recipient=lambda x: True, question=prompt)
|
||||
|
||||
# Parse and process
|
||||
extraction_data = extractor.parse_json(agent_response)
|
||||
extraction_data = extractor.parse_jsonl(agent_response)
|
||||
triples, entity_contexts = extractor.process_extraction_data(extraction_data, v.metadata)
|
||||
|
||||
# Add metadata triples
|
||||
|
|
@ -200,15 +178,15 @@ class TestAgentKgExtractionIntegration:
|
|||
assert len(sent_triples.triples) > 0
|
||||
|
||||
# Check that we have definition triples
|
||||
definition_triples = [t for t in sent_triples.triples if t.p.value == DEFINITION]
|
||||
definition_triples = [t for t in sent_triples.triples if t.p.iri == DEFINITION]
|
||||
assert len(definition_triples) >= 2 # Should have definitions for ML and Neural Networks
|
||||
|
||||
|
||||
# Check that we have label triples
|
||||
label_triples = [t for t in sent_triples.triples if t.p.value == RDF_LABEL]
|
||||
label_triples = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL]
|
||||
assert len(label_triples) >= 2 # Should have labels for entities
|
||||
|
||||
|
||||
# Check subject-of relationships
|
||||
subject_of_triples = [t for t in sent_triples.triples if t.p.value == SUBJECT_OF]
|
||||
subject_of_triples = [t for t in sent_triples.triples if t.p.iri == SUBJECT_OF]
|
||||
assert len(subject_of_triples) >= 2 # Entities should be linked to document
|
||||
|
||||
# Verify entity contexts were emitted
|
||||
|
|
@ -220,7 +198,7 @@ class TestAgentKgExtractionIntegration:
|
|||
assert len(sent_contexts.entities) >= 2 # Should have contexts for both entities
|
||||
|
||||
# Verify entity URIs are properly formed
|
||||
entity_uris = [ec.entity.value for ec in sent_contexts.entities]
|
||||
entity_uris = [ec.entity.iri for ec in sent_contexts.entities]
|
||||
assert f"{TRUSTGRAPH_ENTITIES}Machine%20Learning" in entity_uris
|
||||
assert f"{TRUSTGRAPH_ENTITIES}Neural%20Networks" in entity_uris
|
||||
|
||||
|
|
@ -248,22 +226,28 @@ class TestAgentKgExtractionIntegration:
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_invalid_json_response_handling(self, configured_agent_extractor, sample_chunk, mock_flow_context):
|
||||
"""Test handling of invalid JSON responses from agent"""
|
||||
"""Test handling of invalid JSON responses from agent - JSONL is lenient and skips invalid lines"""
|
||||
# Arrange - mock invalid JSON response
|
||||
agent_client = mock_flow_context("agent-request")
|
||||
|
||||
|
||||
def mock_invalid_json_response(recipient, question):
|
||||
return "This is not valid JSON at all"
|
||||
|
||||
|
||||
agent_client.invoke = mock_invalid_json_response
|
||||
|
||||
|
||||
mock_message = MagicMock()
|
||||
mock_message.value.return_value = sample_chunk
|
||||
mock_consumer = MagicMock()
|
||||
|
||||
# Act & Assert
|
||||
with pytest.raises((ValueError, json.JSONDecodeError)):
|
||||
await configured_agent_extractor.on_message(mock_message, mock_consumer, mock_flow_context)
|
||||
# Act - JSONL parsing is lenient, invalid lines are skipped
|
||||
await configured_agent_extractor.on_message(mock_message, mock_consumer, mock_flow_context)
|
||||
|
||||
# Assert - should emit triples (with just metadata) but no entity contexts
|
||||
triples_publisher = mock_flow_context("triples")
|
||||
triples_publisher.send.assert_called_once()
|
||||
|
||||
entity_contexts_publisher = mock_flow_context("entity-contexts")
|
||||
entity_contexts_publisher.send.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_empty_extraction_results(self, configured_agent_extractor, sample_chunk, mock_flow_context):
|
||||
|
|
@ -272,7 +256,8 @@ class TestAgentKgExtractionIntegration:
|
|||
agent_client = mock_flow_context("agent-request")
|
||||
|
||||
def mock_empty_response(recipient, question):
|
||||
return '{"definitions": [], "relationships": []}'
|
||||
# Return empty JSONL (just empty/whitespace)
|
||||
return ''
|
||||
|
||||
agent_client.invoke = mock_empty_response
|
||||
|
||||
|
|
@ -303,7 +288,8 @@ class TestAgentKgExtractionIntegration:
|
|||
agent_client = mock_flow_context("agent-request")
|
||||
|
||||
def mock_malformed_response(recipient, question):
|
||||
return '''{"definitions": [{"entity": "Missing Definition"}], "relationships": [{"subject": "Missing Object"}]}'''
|
||||
# JSONL with definition missing required field
|
||||
return '{"type": "definition", "entity": "Missing Definition"}'
|
||||
|
||||
agent_client.invoke = mock_malformed_response
|
||||
|
||||
|
|
@ -330,7 +316,7 @@ class TestAgentKgExtractionIntegration:
|
|||
def capture_prompt(recipient, question):
|
||||
# Verify the prompt contains the test text
|
||||
assert test_text in question
|
||||
return '{"definitions": [], "relationships": []}'
|
||||
return '' # Empty JSONL response
|
||||
|
||||
agent_client.invoke = capture_prompt
|
||||
|
||||
|
|
@ -361,7 +347,7 @@ class TestAgentKgExtractionIntegration:
|
|||
responses = []
|
||||
|
||||
def mock_response(recipient, question):
|
||||
response = f'{{"definitions": [{{"entity": "Entity {len(responses)}", "definition": "Definition {len(responses)}"}}], "relationships": []}}'
|
||||
response = f'{{"type": "definition", "entity": "Entity {len(responses)}", "definition": "Definition {len(responses)}"}}'
|
||||
responses.append(response)
|
||||
return response
|
||||
|
||||
|
|
@ -398,7 +384,7 @@ class TestAgentKgExtractionIntegration:
|
|||
# Verify unicode text was properly decoded and included
|
||||
assert "学习机器" in question
|
||||
assert "人工知能" in question
|
||||
return '''{"definitions": [{"entity": "機械学習", "definition": "人工知能の一分野"}], "relationships": []}'''
|
||||
return '{"type": "definition", "entity": "機械学習", "definition": "人工知能の一分野"}'
|
||||
|
||||
agent_client.invoke = mock_unicode_response
|
||||
|
||||
|
|
@ -415,7 +401,7 @@ class TestAgentKgExtractionIntegration:
|
|||
|
||||
sent_triples = triples_publisher.send.call_args[0][0]
|
||||
# Check that unicode entity was properly processed
|
||||
entity_labels = [t for t in sent_triples.triples if t.p.value == RDF_LABEL and t.o.value == "機械学習"]
|
||||
entity_labels = [t for t in sent_triples.triples if t.p.iri == RDF_LABEL and t.o.value == "機械学習"]
|
||||
assert len(entity_labels) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
|
@ -433,7 +419,7 @@ class TestAgentKgExtractionIntegration:
|
|||
def mock_large_text_response(recipient, question):
|
||||
# Verify large text was included
|
||||
assert len(question) > 10000
|
||||
return '''{"definitions": [{"entity": "Machine Learning", "definition": "Important AI technique"}], "relationships": []}'''
|
||||
return '{"type": "definition", "entity": "Machine Learning", "definition": "Important AI technique"}'
|
||||
|
||||
agent_client.invoke = mock_large_text_response
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue