mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-21 02:28:07 +02:00
test(engine): search fuzzy/match_text characterization + RRF non-default pairings
- match_text_matches_exact_set_excludes_unrelated: match_text(body,'neural') == [dl-basics] exactly (not just contains). - fuzzy_does_not_match_under_default_tokenizer: characterizes that fuzzy() is inert with the default tokenizer here (search/match_text work, fuzzy returns nothing); turns red — to be promoted to a real golden — if fuzzy starts matching. - rrf_fuses_two_fts_fields / rrf_fuses_two_vector_queries: RRF fuses arms other than the default nearest+bm25 (bm25 title+body; two vector queries), proving primary_var resolves and fusion runs. New fixtures/search.gq queries + two_vector_params helper. Orders resolved by running, confirmed stable.
This commit is contained in:
parent
e2784cad58
commit
e674e0c3c4
3 changed files with 90 additions and 0 deletions
14
crates/omnigraph/tests/fixtures/search.gq
vendored
14
crates/omnigraph/tests/fixtures/search.gq
vendored
|
|
@ -42,3 +42,17 @@ query hybrid_search($vq: Vector(4), $tq: String) {
|
|||
order { rrf(nearest($d.embedding, $vq), bm25($d.title, $tq)) }
|
||||
limit 3
|
||||
}
|
||||
|
||||
query rrf_two_fts($q: String) {
|
||||
match { $d: Doc }
|
||||
return { $d.slug, $d.title }
|
||||
order { rrf(bm25($d.title, $q), bm25($d.body, $q)) }
|
||||
limit 3
|
||||
}
|
||||
|
||||
query rrf_two_vectors($q1: Vector(4), $q2: Vector(4)) {
|
||||
match { $d: Doc }
|
||||
return { $d.slug, $d.title }
|
||||
order { rrf(nearest($d.embedding, $q1), nearest($d.embedding, $q2)) }
|
||||
limit 3
|
||||
}
|
||||
|
|
|
|||
|
|
@ -236,6 +236,15 @@ pub fn vector_param(name: &str, values: &[f32]) -> ParamMap {
|
|||
map
|
||||
}
|
||||
|
||||
/// Build a ParamMap with two vector params.
|
||||
pub fn two_vector_params(name1: &str, vals1: &[f32], name2: &str, vals2: &[f32]) -> ParamMap {
|
||||
let mut map = vector_param(name1, vals1);
|
||||
let key = name2.strip_prefix('$').unwrap_or(name2).to_string();
|
||||
let lit = Literal::List(vals2.iter().map(|v| Literal::Float(*v as f64)).collect());
|
||||
map.insert(key, lit);
|
||||
map
|
||||
}
|
||||
|
||||
/// Build a ParamMap with a vector param and a string param.
|
||||
pub fn vector_and_string_params(
|
||||
vec_name: &str,
|
||||
|
|
|
|||
|
|
@ -594,6 +594,73 @@ async fn bm25_full_rank_order() {
|
|||
assert_eq!(result_slugs(&result), vec!["rl-intro", "ml-intro", "dl-basics"]);
|
||||
}
|
||||
|
||||
// Characterization: fuzzy() does NOT match under the default tokenizer/index in
|
||||
// this setup — a one-edit typo ("Introductio" for "Introduction") returns no
|
||||
// rows. (`search`/`match_text` DO work, so FTS itself is fine; fuzzy term
|
||||
// queries specifically are inert here.) This pins that documented limitation
|
||||
// instead of leaving fuzzy silently unasserted: if a Lance/tokenizer change
|
||||
// makes fuzzy match, this turns red and should be promoted to a real
|
||||
// matched-set + exclusion golden.
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn fuzzy_does_not_match_under_default_tokenizer() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_search_db(&dir).await;
|
||||
let r = query_main(&mut db, SEARCH_QUERIES, "fuzzy_search", ¶ms(&[("$q", "Introductio")]))
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
result_slugs(&r).is_empty(),
|
||||
"fuzzy now matches — promote this to a real matched-set/exclusion golden"
|
||||
);
|
||||
}
|
||||
|
||||
// match_text is a FILTER on the body: assert the exact matched set, not contains.
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn match_text_matches_exact_set_excludes_unrelated() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_search_db(&dir).await;
|
||||
// "neural" appears only in dl-basics's body ("neural networks").
|
||||
let r = query_main(&mut db, SEARCH_QUERIES, "phrase_search", ¶ms(&[("$q", "neural")]))
|
||||
.await
|
||||
.unwrap();
|
||||
let mut got = result_slugs(&r);
|
||||
got.sort();
|
||||
assert_eq!(got, vec!["dl-basics"]);
|
||||
}
|
||||
|
||||
// RRF fuses arms OTHER than the default nearest+bm25: two FTS arms (title+body).
|
||||
// Proves primary_var resolves when neither arm is `nearest`, and fusion runs.
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn rrf_fuses_two_fts_fields() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_search_db(&dir).await;
|
||||
let r = query_main(&mut db, SEARCH_QUERIES, "rrf_two_fts", ¶ms(&[("$q", "learning")]))
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result_slugs(&r), vec!["dl-basics", "ml-intro", "rl-intro"]);
|
||||
}
|
||||
|
||||
// RRF fuses two vector arms (no embedding creds — explicit vectors). A doc near
|
||||
// BOTH query vectors out-ranks one near only one.
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn rrf_fuses_two_vector_queries() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let mut db = init_search_db(&dir).await;
|
||||
let r = query_main(
|
||||
&mut db,
|
||||
SEARCH_QUERIES,
|
||||
"rrf_two_vectors",
|
||||
&two_vector_params("$q1", &[0.1, 0.2, 0.3, 0.4], "$q2", &[0.5, 0.6, 0.7, 0.8]),
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(result_slugs(&r), vec!["rl-intro", "ml-intro", "dl-basics"]);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn mutation_commit_refreshes_search_indices_without_manual_ensure() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue