mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
consistent PROV-O GraphRAG: - Split retrieval into 4 prompt stages: extract-concepts, kg-edge-scoring, kg-edge-reasoning, kg-synthesis (was single-stage) - Add concept extraction (grounding) for per-concept embedding - Filter main query to default graph, ignoring provenance/explainability edges - Add source document edges to knowledge graph DocumentRAG: - Add grounding step with concept extraction, matching GraphRAG's pattern: Question → Grounding → Exploration → Synthesis - Per-concept embedding and chunk retrieval with deduplication Cross-pipeline: - Make PROV-O derivation links consistent: wasGeneratedBy for first entity from Activity, wasDerivedFrom for entity-to-entity chains - Update CLIs (tg-invoke-agent, tg-invoke-graph-rag, tg-invoke-document-rag) for new explainability structure - Fix all affected unit and integration tests
140 lines
4.1 KiB
Python
140 lines
4.1 KiB
Python
|
|
from . request_response_spec import RequestResponse, RequestResponseSpec
|
|
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Term, IRI, LITERAL, TRIPLE
|
|
from .. knowledge import Uri, Literal
|
|
|
|
|
|
class Triple:
|
|
def __init__(self, s, p, o):
|
|
self.s = s
|
|
self.p = p
|
|
self.o = o
|
|
|
|
|
|
def to_value(x):
|
|
"""Convert schema Term to Uri or Literal."""
|
|
if x.type == IRI:
|
|
return Uri(x.iri)
|
|
elif x.type == LITERAL:
|
|
return Literal(x.value)
|
|
# Fallback
|
|
return Literal(x.value or x.iri)
|
|
|
|
|
|
def from_value(x):
|
|
"""Convert Uri, Literal, string, or Term to schema Term."""
|
|
if x is None:
|
|
return None
|
|
if isinstance(x, Term):
|
|
return x
|
|
if isinstance(x, Uri):
|
|
return Term(type=IRI, iri=str(x))
|
|
elif isinstance(x, Literal):
|
|
return Term(type=LITERAL, value=str(x))
|
|
elif isinstance(x, str):
|
|
# Detect IRIs by common prefixes
|
|
if x.startswith("http://") or x.startswith("https://") or x.startswith("urn:"):
|
|
return Term(type=IRI, iri=x)
|
|
else:
|
|
return Term(type=LITERAL, value=x)
|
|
else:
|
|
return Term(type=LITERAL, value=str(x))
|
|
|
|
class TriplesClient(RequestResponse):
|
|
async def query(self, s=None, p=None, o=None, limit=20,
|
|
user="trustgraph", collection="default",
|
|
timeout=30, g=None):
|
|
|
|
resp = await self.request(
|
|
TriplesQueryRequest(
|
|
s = from_value(s),
|
|
p = from_value(p),
|
|
o = from_value(o),
|
|
limit = limit,
|
|
user = user,
|
|
collection = collection,
|
|
g = g,
|
|
),
|
|
timeout=timeout
|
|
)
|
|
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
triples = [
|
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
|
for v in resp.triples
|
|
]
|
|
|
|
return triples
|
|
|
|
async def query_stream(self, s=None, p=None, o=None, limit=20,
|
|
user="trustgraph", collection="default",
|
|
batch_size=20, timeout=30,
|
|
batch_callback=None, g=None):
|
|
"""
|
|
Streaming triple query - calls callback for each batch as it arrives.
|
|
|
|
Args:
|
|
s, p, o: Triple pattern (None for wildcard)
|
|
limit: Maximum total triples to return
|
|
user: User/keyspace
|
|
collection: Collection name
|
|
batch_size: Triples per batch
|
|
timeout: Request timeout in seconds
|
|
batch_callback: Async callback(batch, is_final) called for each batch
|
|
g: Graph filter. ""=default graph only, None=all graphs,
|
|
or a specific graph IRI.
|
|
|
|
Returns:
|
|
List[Triple]: All triples (flattened) if no callback provided
|
|
"""
|
|
all_triples = []
|
|
|
|
async def recipient(resp):
|
|
if resp.error:
|
|
raise RuntimeError(resp.error.message)
|
|
|
|
batch = [
|
|
Triple(to_value(v.s), to_value(v.p), to_value(v.o))
|
|
for v in resp.triples
|
|
]
|
|
|
|
if batch_callback:
|
|
await batch_callback(batch, resp.is_final)
|
|
else:
|
|
all_triples.extend(batch)
|
|
|
|
return resp.is_final
|
|
|
|
await self.request(
|
|
TriplesQueryRequest(
|
|
s=from_value(s),
|
|
p=from_value(p),
|
|
o=from_value(o),
|
|
limit=limit,
|
|
user=user,
|
|
collection=collection,
|
|
streaming=True,
|
|
batch_size=batch_size,
|
|
g=g,
|
|
),
|
|
timeout=timeout,
|
|
recipient=recipient,
|
|
)
|
|
|
|
if not batch_callback:
|
|
return all_triples
|
|
|
|
class TriplesClientSpec(RequestResponseSpec):
|
|
def __init__(
|
|
self, request_name, response_name,
|
|
):
|
|
super(TriplesClientSpec, self).__init__(
|
|
request_name = request_name,
|
|
request_schema = TriplesQueryRequest,
|
|
response_name = response_name,
|
|
response_schema = TriplesQueryResponse,
|
|
impl = TriplesClient,
|
|
)
|
|
|