mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Add semantic pre-filter for GraphRAG edge scoring (#702)
Embed edge descriptions and compute cosine similarity against grounding concepts to reduce the number of edges sent to expensive LLM scoring. Controlled by edge_score_limit parameter (default 30), skipped when edge count is already below the limit. Also plumbs edge_score_limit and edge_limit parameters end-to-end: - CLI args (--edge-score-limit, --edge-limit) in both invoke and service - Socket client: fix parameter mapping to use hyphenated wire-format keys - Flow API, message translator, gateway all pass through correctly - Explainable code path (_question_explainable_api) now forwards all params - Default edge_score_limit changed from 50 to 30 based on typical subgraph sizes
This commit is contained in:
parent
bc68738c37
commit
1a7b654bd3
7 changed files with 166 additions and 20 deletions
|
|
@ -449,7 +449,7 @@ class FlowInstance:
|
|||
def graph_rag(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
entity_limit=50, triple_limit=30, max_subgraph_size=150,
|
||||
max_path_length=2,
|
||||
max_path_length=2, edge_score_limit=30, edge_limit=25,
|
||||
):
|
||||
"""
|
||||
Execute graph-based Retrieval-Augmented Generation (RAG) query.
|
||||
|
|
@ -465,6 +465,8 @@ class FlowInstance:
|
|||
triple_limit: Maximum triples per entity (default: 30)
|
||||
max_subgraph_size: Maximum total triples in subgraph (default: 150)
|
||||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
|
||||
Returns:
|
||||
str: Generated response incorporating graph context
|
||||
|
|
@ -492,6 +494,8 @@ class FlowInstance:
|
|||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
}
|
||||
|
||||
return self.request(
|
||||
|
|
|
|||
|
|
@ -699,9 +699,12 @@ class SocketFlowInstance:
|
|||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
max_subgraph_size: int = 1000,
|
||||
max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3,
|
||||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
|
|
@ -715,9 +718,12 @@ class SocketFlowInstance:
|
|||
query: Natural language query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
entity_limit: Maximum entities to retrieve (default: 50)
|
||||
triple_limit: Maximum triples per entity (default: 30)
|
||||
max_subgraph_size: Maximum total triples in subgraph (default: 1000)
|
||||
max_subgraph_count: Maximum number of subgraphs (default: 5)
|
||||
max_entity_distance: Maximum traversal depth (default: 3)
|
||||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
streaming: Enable streaming mode (default: False)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
|
|
@ -743,9 +749,12 @@ class SocketFlowInstance:
|
|||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
"max-entity-distance": max_entity_distance,
|
||||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"streaming": streaming
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
|
@ -762,9 +771,12 @@ class SocketFlowInstance:
|
|||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
max_subgraph_size: int = 1000,
|
||||
max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3,
|
||||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
**kwargs: Any
|
||||
) -> Iterator[Union[RAGChunk, ProvenanceEvent]]:
|
||||
"""
|
||||
|
|
@ -778,9 +790,12 @@ class SocketFlowInstance:
|
|||
query: Natural language query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
entity_limit: Maximum entities to retrieve (default: 50)
|
||||
triple_limit: Maximum triples per entity (default: 30)
|
||||
max_subgraph_size: Maximum total triples in subgraph (default: 1000)
|
||||
max_subgraph_count: Maximum number of subgraphs (default: 5)
|
||||
max_entity_distance: Maximum traversal depth (default: 3)
|
||||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
Yields:
|
||||
|
|
@ -823,11 +838,14 @@ class SocketFlowInstance:
|
|||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
"max-entity-distance": max_entity_distance,
|
||||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"streaming": True,
|
||||
"explainable": True, # Enable explainability mode
|
||||
"explainable": True,
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
triple_limit=int(data.get("triple-limit", 30)),
|
||||
max_subgraph_size=int(data.get("max-subgraph-size", 1000)),
|
||||
max_path_length=int(data.get("max-path-length", 2)),
|
||||
edge_score_limit=int(data.get("edge-score-limit", 30)),
|
||||
edge_limit=int(data.get("edge-limit", 25)),
|
||||
streaming=data.get("streaming", False)
|
||||
)
|
||||
|
|
@ -97,6 +98,7 @@ class GraphRagRequestTranslator(MessageTranslator):
|
|||
"triple-limit": obj.triple_limit,
|
||||
"max-subgraph-size": obj.max_subgraph_size,
|
||||
"max-path-length": obj.max_path_length,
|
||||
"edge-score-limit": obj.edge_score_limit,
|
||||
"edge-limit": obj.edge_limit,
|
||||
"streaming": getattr(obj, "streaming", False)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ class GraphRagQuery:
|
|||
triple_limit: int = 0
|
||||
max_subgraph_size: int = 0
|
||||
max_path_length: int = 0
|
||||
edge_score_limit: int = 0
|
||||
edge_limit: int = 0
|
||||
streaming: bool = False
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue