mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-01 17:39:39 +02:00
feat: replace LLM edge scoring with cross-encoder reranker in GraphRAG
Replace the three-prompt LLM scoring pipeline (kg-edge-scoring, kg-edge-reasoning, kg-edge-selection) with a cross-encoder reranker service backed by FlashRank. The new hop_and_filter() method performs iterative graph traversal with semantic scoring at each hop, replacing the previous follow_edges/get_subgraph approach. - Add reranker service (trustgraph-base client/service, FlashRank processor) - Add gateway dispatch for reranker via API and WebSocket - Rewrite GraphRAG pipeline: hop_and_filter() with per-hop cross-encoder scoring - Remove kg_prompt() and edge_score_limit from prompt client - Update provenance: add tg:EdgeSelection type, tg:concept, tg:score predicates - Update CLIs (tg-invoke-graph-rag, tg-show-explain-trace) for new metadata - Add tg-invoke-reranker CLI tool - Add tech spec and UX developer guidance - Update all unit and integration tests
This commit is contained in:
parent
1aa9549912
commit
1346cbebb4
43 changed files with 1613 additions and 792 deletions
|
|
@ -71,6 +71,7 @@ tg-invoke-rows-query = "trustgraph.cli.invoke_rows_query:main"
|
|||
tg-invoke-sparql-query = "trustgraph.cli.invoke_sparql_query:main"
|
||||
tg-invoke-row-embeddings = "trustgraph.cli.invoke_row_embeddings:main"
|
||||
tg-invoke-prompt = "trustgraph.cli.invoke_prompt:main"
|
||||
tg-invoke-reranker = "trustgraph.cli.invoke_reranker:main"
|
||||
tg-invoke-structured-query = "trustgraph.cli.invoke_structured_query:main"
|
||||
tg-load-doc-embeds = "trustgraph.cli.load_doc_embeds:main"
|
||||
tg-load-kg-core = "trustgraph.cli.load_kg_core:main"
|
||||
|
|
|
|||
|
|
@ -112,14 +112,13 @@ def _question_explainable_api(
|
|||
if focus_full and focus_full.edge_selections:
|
||||
for edge_sel in focus_full.edge_selections:
|
||||
if edge_sel.edge:
|
||||
# Resolve labels for edge components
|
||||
s_label, p_label, o_label = explain_client.resolve_edge_labels(
|
||||
edge_sel.edge, collection
|
||||
)
|
||||
print(f" Edge: ({s_label}, {p_label}, {o_label})", file=sys.stderr)
|
||||
if edge_sel.reasoning:
|
||||
r_short = edge_sel.reasoning[:100] + "..." if len(edge_sel.reasoning) > 100 else edge_sel.reasoning
|
||||
print(f" Reason: {r_short}", file=sys.stderr)
|
||||
if edge_sel.concept or edge_sel.score is not None:
|
||||
score_str = f"{edge_sel.score:.4f}" if edge_sel.score is not None else "?"
|
||||
print(f" Concept: {edge_sel.concept} Score: {score_str}", file=sys.stderr)
|
||||
|
||||
elif isinstance(entity, Synthesis):
|
||||
print(f"\n [synthesis] {prov_id}", file=sys.stderr)
|
||||
|
|
|
|||
127
trustgraph-cli/trustgraph/cli/invoke_reranker.py
Normal file
127
trustgraph-cli/trustgraph/cli/invoke_reranker.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
"""
|
||||
Invokes the reranker service to score and rank documents by relevance
|
||||
to one or more queries.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from trustgraph.api import Api
|
||||
|
||||
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
|
||||
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
||||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
|
||||
def query(url, flow_id, queries, documents, limit, token=None,
|
||||
workspace="default"):
|
||||
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
socket = api.socket()
|
||||
flow = socket.flow(flow_id)
|
||||
|
||||
try:
|
||||
|
||||
query_objects = [
|
||||
{"query_id": str(i), "query_text": q}
|
||||
for i, q in enumerate(queries)
|
||||
]
|
||||
|
||||
document_objects = [
|
||||
{"document_id": str(i), "document_text": d}
|
||||
for i, d in enumerate(documents)
|
||||
]
|
||||
|
||||
result = flow.rerank(
|
||||
queries=query_objects,
|
||||
documents=document_objects,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
if "error" in result and result["error"]:
|
||||
err = result["error"]
|
||||
print(f"Error: [{err.get('type', '')}] {err.get('message', '')}")
|
||||
return
|
||||
|
||||
for r in result.get("results", []):
|
||||
doc_idx = int(r["document_id"])
|
||||
query_idx = int(r["query_id"])
|
||||
print(
|
||||
f" {r['score']:.4f} | "
|
||||
f"query: {queries[query_idx]} | "
|
||||
f"doc: {documents[doc_idx]}"
|
||||
)
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='tg-invoke-reranker',
|
||||
description=__doc__,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-u', '--url',
|
||||
default=default_url,
|
||||
help=f'API URL (default: {default_url})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--token',
|
||||
default=default_token,
|
||||
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-w', '--workspace',
|
||||
default=default_workspace,
|
||||
help=f'Workspace (default: {default_workspace})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-f', '--flow-id',
|
||||
default="default",
|
||||
help=f'Flow ID (default: default)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--limit',
|
||||
type=int,
|
||||
default=10,
|
||||
help='Maximum number of results (default: 10)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-q', '--query',
|
||||
action='append',
|
||||
required=True,
|
||||
help='Query text (can be specified multiple times)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'documents',
|
||||
nargs='+',
|
||||
help='Documents to rerank',
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
|
||||
query(
|
||||
url=args.url,
|
||||
flow_id=args.flow_id,
|
||||
queries=args.query,
|
||||
documents=args.documents,
|
||||
limit=args.limit,
|
||||
token=args.token,
|
||||
workspace=args.workspace,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -203,9 +203,9 @@ def print_graphrag_text(trace, explain_client, flow, collection, api=None, show_
|
|||
)
|
||||
print(f" {i}. ({s_label}, {p_label}, {o_label})")
|
||||
|
||||
if edge_sel.reasoning:
|
||||
r_short = edge_sel.reasoning[:100] + "..." if len(edge_sel.reasoning) > 100 else edge_sel.reasoning
|
||||
print(f" Reasoning: {r_short}")
|
||||
if edge_sel.concept or edge_sel.score is not None:
|
||||
score_str = f"{edge_sel.score:.4f}" if edge_sel.score is not None else "?"
|
||||
print(f" Concept: {edge_sel.concept} Score: {score_str}")
|
||||
|
||||
if show_provenance and edge_sel.edge:
|
||||
provenance = trace_edge_provenance(
|
||||
|
|
@ -519,7 +519,8 @@ def trace_to_dict(trace, trace_type):
|
|||
"selected_edges": [
|
||||
{
|
||||
"edge": edge_sel.edge,
|
||||
"reasoning": edge_sel.reasoning,
|
||||
"concept": edge_sel.concept,
|
||||
"score": edge_sel.score,
|
||||
}
|
||||
for edge_sel in focus.edge_selections
|
||||
],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue