mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-22 05:08:06 +02:00
Add semantic pre-filter for GraphRAG edge scoring (#702)
Embed edge descriptions and compute cosine similarity against grounding concepts to reduce the number of edges sent to expensive LLM scoring. Controlled by edge_score_limit parameter (default 30), skipped when edge count is already below the limit. Also plumbs edge_score_limit and edge_limit parameters end-to-end: - CLI args (--edge-score-limit, --edge-limit) in both invoke and service - Socket client: fix parameter mapping to use hyphenated wire-format keys - Flow API, message translator, gateway all pass through correctly - Explainable code path (_question_explainable_api) now forwards all params - Default edge_score_limit changed from 50 to 30 based on typical subgraph sizes
This commit is contained in:
parent
bc68738c37
commit
1a7b654bd3
7 changed files with 166 additions and 20 deletions
|
|
@ -699,9 +699,12 @@ class SocketFlowInstance:
|
|||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
max_subgraph_size: int = 1000,
|
||||
max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3,
|
||||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
streaming: bool = False,
|
||||
**kwargs: Any
|
||||
) -> Union[str, Iterator[str]]:
|
||||
|
|
@ -715,9 +718,12 @@ class SocketFlowInstance:
|
|||
query: Natural language query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
entity_limit: Maximum entities to retrieve (default: 50)
|
||||
triple_limit: Maximum triples per entity (default: 30)
|
||||
max_subgraph_size: Maximum total triples in subgraph (default: 1000)
|
||||
max_subgraph_count: Maximum number of subgraphs (default: 5)
|
||||
max_entity_distance: Maximum traversal depth (default: 3)
|
||||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
streaming: Enable streaming mode (default: False)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
|
|
@ -743,9 +749,12 @@ class SocketFlowInstance:
|
|||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
"max-entity-distance": max_entity_distance,
|
||||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"streaming": streaming
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
|
@ -762,9 +771,12 @@ class SocketFlowInstance:
|
|||
query: str,
|
||||
user: str,
|
||||
collection: str,
|
||||
entity_limit: int = 50,
|
||||
triple_limit: int = 30,
|
||||
max_subgraph_size: int = 1000,
|
||||
max_subgraph_count: int = 5,
|
||||
max_entity_distance: int = 3,
|
||||
max_path_length: int = 2,
|
||||
edge_score_limit: int = 30,
|
||||
edge_limit: int = 25,
|
||||
**kwargs: Any
|
||||
) -> Iterator[Union[RAGChunk, ProvenanceEvent]]:
|
||||
"""
|
||||
|
|
@ -778,9 +790,12 @@ class SocketFlowInstance:
|
|||
query: Natural language query
|
||||
user: User/keyspace identifier
|
||||
collection: Collection identifier
|
||||
entity_limit: Maximum entities to retrieve (default: 50)
|
||||
triple_limit: Maximum triples per entity (default: 30)
|
||||
max_subgraph_size: Maximum total triples in subgraph (default: 1000)
|
||||
max_subgraph_count: Maximum number of subgraphs (default: 5)
|
||||
max_entity_distance: Maximum traversal depth (default: 3)
|
||||
max_path_length: Maximum traversal depth (default: 2)
|
||||
edge_score_limit: Max edges for semantic pre-filter (default: 50)
|
||||
edge_limit: Max edges after LLM scoring (default: 25)
|
||||
**kwargs: Additional parameters passed to the service
|
||||
|
||||
Yields:
|
||||
|
|
@ -823,11 +838,14 @@ class SocketFlowInstance:
|
|||
"query": query,
|
||||
"user": user,
|
||||
"collection": collection,
|
||||
"entity-limit": entity_limit,
|
||||
"triple-limit": triple_limit,
|
||||
"max-subgraph-size": max_subgraph_size,
|
||||
"max-subgraph-count": max_subgraph_count,
|
||||
"max-entity-distance": max_entity_distance,
|
||||
"max-path-length": max_path_length,
|
||||
"edge-score-limit": edge_score_limit,
|
||||
"edge-limit": edge_limit,
|
||||
"streaming": True,
|
||||
"explainable": True, # Enable explainability mode
|
||||
"explainable": True,
|
||||
}
|
||||
request.update(kwargs)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue