mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding (#697)
Enhance retrieval pipelines: 4-stage GraphRAG, DocRAG grounding, consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
This commit is contained in:
parent
29b4300808
commit
a115ec06ab
25 changed files with 1537 additions and 1008 deletions
|
|
@ -1,6 +1,6 @@
|
|||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL, TRIPLE
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
|
||||
|
|
@ -22,9 +22,11 @@ def to_value(x):
|
|||
|
||||
|
||||
def from_value(x):
|
||||
"""Convert Uri, Literal, or string to schema Term."""
|
||||
"""Convert Uri, Literal, string, or Term to schema Term."""
|
||||
if x is None:
|
||||
return None
|
||||
if isinstance(x, Term):
|
||||
return x
|
||||
if isinstance(x, Uri):
|
||||
return Term(type=IRI, iri=str(x))
|
||||
elif isinstance(x, Literal):
|
||||
|
|
@ -41,7 +43,7 @@ def from_value(x):
|
|||
class TriplesClient(RequestResponse):
|
||||
async def query(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
timeout=30):
|
||||
timeout=30, g=None):
|
||||
|
||||
resp = await self.request(
|
||||
TriplesQueryRequest(
|
||||
|
|
@ -51,6 +53,7 @@ class TriplesClient(RequestResponse):
|
|||
limit = limit,
|
||||
user = user,
|
||||
collection = collection,
|
||||
g = g,
|
||||
),
|
||||
timeout=timeout
|
||||
)
|
||||
|
|
@ -68,7 +71,7 @@ class TriplesClient(RequestResponse):
|
|||
async def query_stream(self, s=None, p=None, o=None, limit=20,
|
||||
user="trustgraph", collection="default",
|
||||
batch_size=20, timeout=30,
|
||||
batch_callback=None):
|
||||
batch_callback=None, g=None):
|
||||
"""
|
||||
Streaming triple query - calls callback for each batch as it arrives.
|
||||
|
||||
|
|
@ -80,6 +83,8 @@ class TriplesClient(RequestResponse):
|
|||
batch_size: Triples per batch
|
||||
timeout: Request timeout in seconds
|
||||
batch_callback: Async callback(batch, is_final) called for each batch
|
||||
g: Graph filter. ""=default graph only, None=all graphs,
|
||||
or a specific graph IRI.
|
||||
|
||||
Returns:
|
||||
List[Triple]: All triples (flattened) if no callback provided
|
||||
|
|
@ -112,6 +117,7 @@ class TriplesClient(RequestResponse):
|
|||
collection=collection,
|
||||
streaming=True,
|
||||
batch_size=batch_size,
|
||||
g=g,
|
||||
),
|
||||
timeout=timeout,
|
||||
recipient=recipient,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue