mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 08:26:21 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -202,16 +202,17 @@ def question_explainable(
|
|||
|
||||
elif isinstance(entity, Analysis):
|
||||
print(f"\n [iteration] {prov_id}", file=sys.stderr)
|
||||
if entity.thought:
|
||||
thought_short = entity.thought[:80] + "..." if len(entity.thought) > 80 else entity.thought
|
||||
print(f" Thought: {thought_short}", file=sys.stderr)
|
||||
if entity.action:
|
||||
print(f" Action: {entity.action}", file=sys.stderr)
|
||||
if entity.thought_uri:
|
||||
print(f" Thought: {entity.thought_uri}", file=sys.stderr)
|
||||
if entity.observation_uri:
|
||||
print(f" Observation: {entity.observation_uri}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Conclusion):
|
||||
print(f"\n [conclusion] {prov_id}", file=sys.stderr)
|
||||
if entity.answer:
|
||||
print(f" Answer length: {len(entity.answer)} chars", file=sys.stderr)
|
||||
if entity.document_uri:
|
||||
print(f" Document: {entity.document_uri}", file=sys.stderr)
|
||||
|
||||
else:
|
||||
if debug:
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from trustgraph.api import (
|
|||
RAGChunk,
|
||||
ProvenanceEvent,
|
||||
Question,
|
||||
Grounding,
|
||||
Exploration,
|
||||
Synthesis,
|
||||
)
|
||||
|
|
@ -68,6 +69,12 @@ def question_explainable(
|
|||
if entity.timestamp:
|
||||
print(f" Time: {entity.timestamp}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Grounding):
|
||||
print(f"\n [grounding] {prov_id}", file=sys.stderr)
|
||||
if entity.concepts:
|
||||
for concept in entity.concepts:
|
||||
print(f" Concept: {concept}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Exploration):
|
||||
print(f"\n [exploration] {prov_id}", file=sys.stderr)
|
||||
if entity.chunk_count:
|
||||
|
|
@ -75,8 +82,8 @@ def question_explainable(
|
|||
|
||||
elif isinstance(entity, Synthesis):
|
||||
print(f"\n [synthesis] {prov_id}", file=sys.stderr)
|
||||
if entity.content:
|
||||
print(f" Synthesis length: {len(entity.content)} chars", file=sys.stderr)
|
||||
if entity.document_uri:
|
||||
print(f" Document: {entity.document_uri}", file=sys.stderr)
|
||||
|
||||
else:
|
||||
if debug:
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ from trustgraph.api import (
|
|||
RAGChunk,
|
||||
ProvenanceEvent,
|
||||
Question,
|
||||
Grounding,
|
||||
Exploration,
|
||||
Focus,
|
||||
Synthesis,
|
||||
|
|
@ -31,11 +32,13 @@ default_max_path_length = 2
|
|||
# Provenance predicates
|
||||
TG = "https://trustgraph.ai/ns/"
|
||||
TG_QUERY = TG + "query"
|
||||
TG_CONCEPT = TG + "concept"
|
||||
TG_ENTITY = TG + "entity"
|
||||
TG_EDGE_COUNT = TG + "edgeCount"
|
||||
TG_SELECTED_EDGE = TG + "selectedEdge"
|
||||
TG_EDGE = TG + "edge"
|
||||
TG_REASONING = TG + "reasoning"
|
||||
TG_CONTENT = TG + "content"
|
||||
TG_DOCUMENT = TG + "document"
|
||||
TG_CONTAINS = TG + "contains"
|
||||
PROV = "http://www.w3.org/ns/prov#"
|
||||
PROV_STARTED_AT_TIME = PROV + "startedAtTime"
|
||||
|
|
@ -47,6 +50,8 @@ def _get_event_type(prov_id):
|
|||
"""Extract event type from provenance_id"""
|
||||
if "question" in prov_id:
|
||||
return "question"
|
||||
elif "grounding" in prov_id:
|
||||
return "grounding"
|
||||
elif "exploration" in prov_id:
|
||||
return "exploration"
|
||||
elif "focus" in prov_id:
|
||||
|
|
@ -68,8 +73,16 @@ def _format_provenance_details(event_type, triples):
|
|||
elif p == PROV_STARTED_AT_TIME:
|
||||
lines.append(f" Time: {o}")
|
||||
|
||||
elif event_type == "grounding":
|
||||
# Show extracted concepts
|
||||
concepts = [o for s, p, o in triples if p == TG_CONCEPT]
|
||||
if concepts:
|
||||
lines.append(f" Concepts: {len(concepts)}")
|
||||
for concept in concepts:
|
||||
lines.append(f" - {concept}")
|
||||
|
||||
elif event_type == "exploration":
|
||||
# Show edge count
|
||||
# Show edge count (seed entities resolved separately with labels)
|
||||
for s, p, o in triples:
|
||||
if p == TG_EDGE_COUNT:
|
||||
lines.append(f" Edges explored: {o}")
|
||||
|
|
@ -85,10 +98,10 @@ def _format_provenance_details(event_type, triples):
|
|||
lines.append(f" Focused on {len(edge_sel_uris)} edge(s)")
|
||||
|
||||
elif event_type == "synthesis":
|
||||
# Show content length (not full content - it's already streamed)
|
||||
# Show document reference (content already streamed)
|
||||
for s, p, o in triples:
|
||||
if p == TG_CONTENT:
|
||||
lines.append(f" Synthesis length: {len(o)} chars")
|
||||
if p == TG_DOCUMENT:
|
||||
lines.append(f" Document: {o}")
|
||||
|
||||
return lines
|
||||
|
||||
|
|
@ -542,6 +555,18 @@ async def _question_explainable(
|
|||
for line in details:
|
||||
print(line, file=sys.stderr)
|
||||
|
||||
# For exploration events, resolve entity labels
|
||||
if event_type == "exploration":
|
||||
entity_iris = [o for s, p, o in triples if p == TG_ENTITY]
|
||||
if entity_iris:
|
||||
print(f" Seed entities: {len(entity_iris)}", file=sys.stderr)
|
||||
for iri in entity_iris:
|
||||
label = await _query_label(
|
||||
ws_url, flow_id, iri, user, collection,
|
||||
label_cache, debug=debug
|
||||
)
|
||||
print(f" - {label}", file=sys.stderr)
|
||||
|
||||
# For focus events, query each edge selection for details
|
||||
if event_type == "focus":
|
||||
for s, p, o in triples:
|
||||
|
|
@ -660,10 +685,22 @@ def _question_explainable_api(
|
|||
if entity.timestamp:
|
||||
print(f" Time: {entity.timestamp}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Grounding):
|
||||
print(f"\n [grounding] {prov_id}", file=sys.stderr)
|
||||
if entity.concepts:
|
||||
print(f" Concepts: {len(entity.concepts)}", file=sys.stderr)
|
||||
for concept in entity.concepts:
|
||||
print(f" - {concept}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Exploration):
|
||||
print(f"\n [exploration] {prov_id}", file=sys.stderr)
|
||||
if entity.edge_count:
|
||||
print(f" Edges explored: {entity.edge_count}", file=sys.stderr)
|
||||
if entity.entities:
|
||||
print(f" Seed entities: {len(entity.entities)}", file=sys.stderr)
|
||||
for ent in entity.entities:
|
||||
label = explain_client.resolve_label(ent, user, collection)
|
||||
print(f" - {label}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Focus):
|
||||
print(f"\n [focus] {prov_id}", file=sys.stderr)
|
||||
|
|
@ -691,8 +728,8 @@ def _question_explainable_api(
|
|||
|
||||
elif isinstance(entity, Synthesis):
|
||||
print(f"\n [synthesis] {prov_id}", file=sys.stderr)
|
||||
if entity.content:
|
||||
print(f" Synthesis length: {len(entity.content)} chars", file=sys.stderr)
|
||||
if entity.document_uri:
|
||||
print(f" Document: {entity.document_uri}", file=sys.stderr)
|
||||
|
||||
else:
|
||||
if debug:
|
||||
|
|
@ -848,7 +885,7 @@ def main():
|
|||
parser.add_argument(
|
||||
'-x', '--explainable',
|
||||
action='store_true',
|
||||
help='Show provenance events: Question, Exploration, Focus, Synthesis (implies streaming)'
|
||||
help='Show provenance events: Question, Grounding, Exploration, Focus, Synthesis (implies streaming)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue