mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-05 11:22:11 +02:00
feat: filter and cap GraphRAG reranker input across full stack (#1021)
- Filter out RDF/RDFS/OWL schema predicates (rdfs:domain, owl:inverseOf, etc.) from hop traversal, keeping rdf:type for data signal - Skip edges where reranker-visible components are unlabeled IRIs, since the cross-encoder cannot meaningfully score raw URIs - Add max-reranker-input safety cap (default 350) to prevent overloading the reranker, applied after filtering for maximum useful candidates - Expose max-reranker-input as per-request parameter through schema, translator, REST API, socket client, CLI, and OpenAPI spec - Update tests - Update tech spec
This commit is contained in:
parent
76c4763b9b
commit
68e816e65c
10 changed files with 198 additions and 43 deletions
|
|
@ -357,6 +357,7 @@ class FlowInstance:
|
|||
self, query,collection="default",
|
||||
entity_limit=50, triple_limit=30, max_subgraph_size=150,
|
||||
max_path_length=2, edge_score_limit=30, edge_limit=25,
|
||||
max_reranker_input=350,
|
||||
):
|
||||
"""
|
||||
Execute graph-based Retrieval-Augmented Generation (RAG) query.
|
||||
|
|
@ -373,6 +374,7 @@ class FlowInstance:
|
|||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
max_reranker_input: Max candidate edges sent to reranker per hop (default: 350)
|
||||
|
||||
Returns:
|
||||
str: Generated response incorporating graph context
|
||||
|
|
@ -399,6 +401,7 @@ class FlowInstance:
|
|||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"max-reranker-input": max_reranker_input,
|
||||
}
|
||||
|
||||
result = self.request(
|
||||
|
|
|
|||
|
|
@ -682,6 +682,7 @@ class SocketFlowInstance:
|
|||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
max_reranker_input: int = 350,
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[TextCompletionResult, Iterator[RAGChunk]]:
|
||||
|
|
@ -699,6 +700,7 @@ class SocketFlowInstance:
|
|||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"max-reranker-input": max_reranker_input,
|
||||
"streaming": streaming
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
|
@ -725,6 +727,7 @@ class SocketFlowInstance:
|
|||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
max_reranker_input: int = 350,
|
||||
**kwargs: Any
|
||||
) -> Iterator[Union[RAGChunk, ProvenanceEvent]]:
|
||||
"""Execute graph-based RAG query with explainability support."""
|
||||
|
|
@ -737,6 +740,7 @@ class SocketFlowInstance:
|
|||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"max-reranker-input": max_reranker_input,
|
||||
"streaming": True,
|
||||
"explainable": True,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,6 +103,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
max_path_length=int(data.get("max-path-length", 2)),
|
||||
edge_score_limit=int(data.get("edge-score-limit", 30)),
|
||||
edge_limit=int(data.get("edge-limit", 25)),
|
||||
max_reranker_input=int(data.get("max-reranker-input", 350)),
|
||||
streaming=data.get("streaming", False)
|
||||
)
|
||||
|
||||
|
|
@ -116,6 +117,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
"max-path-length": obj.max_path_length,
|
||||
"edge-score-limit": obj.edge_score_limit,
|
||||
"edge-limit": obj.edge_limit,
|
||||
"max-reranker-input": obj.max_reranker_input,
|
||||
"streaming": getattr(obj, "streaming", False)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ class GraphRagQuery:
|
|||
max_path_length: int = 0
|
||||
edge_score_limit: int = 0
|
||||
edge_limit: int = 0
|
||||
max_reranker_input: int = 0
|
||||
streaming: bool = False
|
||||
parent_uri: str = ""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue