mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 15:01:00 +02:00
Wire the FlashRank reranker subsystem from #1005 into Document-RAG: after vector retrieval, over-fetch a wider candidate pool, rerank with the cross-encoder, and keep the top doc_limit chunks for synthesis. Per maintainer review, the fetch and select sizes are two caller-controlled limits rather than one internal heuristic: - doc_limit: chunks selected into the synthesis prompt (unchanged meaning). - fetch_limit: candidate pool pulled from the vector store before reranking. 0 = derive (OVERFETCH_FACTOR x doc_limit); values below doc_limit are raised to it. Lets the caller control how hard the reranker has to work. Details: - schema: DocumentRagQuery.fetch_limit (additive, backward compatible). - document_rag.py / rag.py: fetch_limit resolved in the processor (mirrors doc_limit); the core applies the heuristic default and derives synthesis provenance from the chunk-selection focus when reranking ran. - provenance: tg:ChunkSelection focus stage (mirrors tg:EdgeSelection). - request translator + client SDKs + CLI: fetch-limit / --fetch-limit, threaded exactly like doc_limit and the GraphRAG limits. - tests: no-op identity, over-fetch/narrow, explicit fetch_limit, heuristic default, floor-at-doc_limit, provenance lineage, cross-repo topic wiring. Reranking is skipped byte-identically when no reranker role is wired. Requires the companion trustgraph-templates change wiring the reranker topics into the document-rag flow (mirrors #279 for GraphRAG).
This commit is contained in:
parent
f18d48dc39
commit
6c9a545a06
18 changed files with 853 additions and 26 deletions
|
|
@ -21,10 +21,12 @@ default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
|
|||
default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")
|
||||
default_collection = 'default'
|
||||
default_doc_limit = 10
|
||||
default_fetch_limit = 0
|
||||
|
||||
|
||||
def question_explainable(
|
||||
url, flow_id, question_text, collection, doc_limit, token=None, debug=False,
|
||||
url, flow_id, question_text, collection, doc_limit, fetch_limit=0,
|
||||
token=None, debug=False,
|
||||
workspace="default",
|
||||
):
|
||||
"""Execute document RAG with explainability - shows provenance events inline."""
|
||||
|
|
@ -39,6 +41,7 @@ def question_explainable(
|
|||
query=question_text,
|
||||
collection=collection,
|
||||
doc_limit=doc_limit,
|
||||
fetch_limit=fetch_limit,
|
||||
):
|
||||
if isinstance(item, RAGChunk):
|
||||
# Print response content
|
||||
|
|
@ -97,7 +100,7 @@ def question_explainable(
|
|||
|
||||
|
||||
def question(
|
||||
url, flow_id, question_text, collection, doc_limit,
|
||||
url, flow_id, question_text, collection, doc_limit, fetch_limit=0,
|
||||
streaming=True, token=None, explainable=False, debug=False,
|
||||
show_usage=False, workspace="default",
|
||||
):
|
||||
|
|
@ -109,6 +112,7 @@ def question(
|
|||
question_text=question_text,
|
||||
collection=collection,
|
||||
doc_limit=doc_limit,
|
||||
fetch_limit=fetch_limit,
|
||||
token=token,
|
||||
debug=debug,
|
||||
workspace=workspace,
|
||||
|
|
@ -128,6 +132,7 @@ def question(
|
|||
query=question_text,
|
||||
collection=collection,
|
||||
doc_limit=doc_limit,
|
||||
fetch_limit=fetch_limit,
|
||||
streaming=True
|
||||
)
|
||||
|
||||
|
|
@ -155,6 +160,7 @@ def question(
|
|||
query=question_text,
|
||||
collection=collection,
|
||||
doc_limit=doc_limit,
|
||||
fetch_limit=fetch_limit,
|
||||
)
|
||||
print(result.text)
|
||||
|
||||
|
|
@ -214,7 +220,15 @@ def main():
|
|||
'-d', '--doc-limit',
|
||||
type=int,
|
||||
default=default_doc_limit,
|
||||
help=f'Document limit (default: {default_doc_limit})'
|
||||
help=f'Documents selected into the prompt (default: {default_doc_limit})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--fetch-limit',
|
||||
type=int,
|
||||
default=default_fetch_limit,
|
||||
help='Candidate documents fetched from the vector store before '
|
||||
'reranking (default: derive from doc-limit)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
|
@ -251,6 +265,7 @@ def main():
|
|||
question_text=args.question,
|
||||
collection=args.collection,
|
||||
doc_limit=args.doc_limit,
|
||||
fetch_limit=args.fetch_limit,
|
||||
streaming=not args.no_streaming,
|
||||
token=args.token,
|
||||
explainable=args.explainable,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue