diff --git a/crates/omnigraph/tests/fixtures/search.gq b/crates/omnigraph/tests/fixtures/search.gq index c39af82..d53fbc9 100644 --- a/crates/omnigraph/tests/fixtures/search.gq +++ b/crates/omnigraph/tests/fixtures/search.gq @@ -42,3 +42,17 @@ query hybrid_search($vq: Vector(4), $tq: String) { order { rrf(nearest($d.embedding, $vq), bm25($d.title, $tq)) } limit 3 } + +query rrf_two_fts($q: String) { + match { $d: Doc } + return { $d.slug, $d.title } + order { rrf(bm25($d.title, $q), bm25($d.body, $q)) } + limit 3 +} + +query rrf_two_vectors($q1: Vector(4), $q2: Vector(4)) { + match { $d: Doc } + return { $d.slug, $d.title } + order { rrf(nearest($d.embedding, $q1), nearest($d.embedding, $q2)) } + limit 3 +} diff --git a/crates/omnigraph/tests/helpers/mod.rs b/crates/omnigraph/tests/helpers/mod.rs index c97ff72..0e04aa2 100644 --- a/crates/omnigraph/tests/helpers/mod.rs +++ b/crates/omnigraph/tests/helpers/mod.rs @@ -236,6 +236,15 @@ pub fn vector_param(name: &str, values: &[f32]) -> ParamMap { map } +/// Build a ParamMap with two vector params. +pub fn two_vector_params(name1: &str, vals1: &[f32], name2: &str, vals2: &[f32]) -> ParamMap { + let mut map = vector_param(name1, vals1); + let key = name2.strip_prefix('$').unwrap_or(name2).to_string(); + let lit = Literal::List(vals2.iter().map(|v| Literal::Float(*v as f64)).collect()); + map.insert(key, lit); + map +} + /// Build a ParamMap with a vector param and a string param. pub fn vector_and_string_params( vec_name: &str, diff --git a/crates/omnigraph/tests/search.rs b/crates/omnigraph/tests/search.rs index 653172b..480ec3c 100644 --- a/crates/omnigraph/tests/search.rs +++ b/crates/omnigraph/tests/search.rs @@ -594,6 +594,73 @@ async fn bm25_full_rank_order() { assert_eq!(result_slugs(&result), vec!["rl-intro", "ml-intro", "dl-basics"]); } +// Characterization: fuzzy() does NOT match under the default tokenizer/index in +// this setup — a one-edit typo ("Introductio" for "Introduction") returns no +// rows. (`search`/`match_text` DO work, so FTS itself is fine; fuzzy term +// queries specifically are inert here.) This pins that documented limitation +// instead of leaving fuzzy silently unasserted: if a Lance/tokenizer change +// makes fuzzy match, this turns red and should be promoted to a real +// matched-set + exclusion golden. +#[tokio::test] +#[serial] +async fn fuzzy_does_not_match_under_default_tokenizer() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_search_db(&dir).await; + let r = query_main(&mut db, SEARCH_QUERIES, "fuzzy_search", ¶ms(&[("$q", "Introductio")])) + .await + .unwrap(); + assert!( + result_slugs(&r).is_empty(), + "fuzzy now matches — promote this to a real matched-set/exclusion golden" + ); +} + +// match_text is a FILTER on the body: assert the exact matched set, not contains. +#[tokio::test] +#[serial] +async fn match_text_matches_exact_set_excludes_unrelated() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_search_db(&dir).await; + // "neural" appears only in dl-basics's body ("neural networks"). + let r = query_main(&mut db, SEARCH_QUERIES, "phrase_search", ¶ms(&[("$q", "neural")])) + .await + .unwrap(); + let mut got = result_slugs(&r); + got.sort(); + assert_eq!(got, vec!["dl-basics"]); +} + +// RRF fuses arms OTHER than the default nearest+bm25: two FTS arms (title+body). +// Proves primary_var resolves when neither arm is `nearest`, and fusion runs. +#[tokio::test] +#[serial] +async fn rrf_fuses_two_fts_fields() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_search_db(&dir).await; + let r = query_main(&mut db, SEARCH_QUERIES, "rrf_two_fts", ¶ms(&[("$q", "learning")])) + .await + .unwrap(); + assert_eq!(result_slugs(&r), vec!["dl-basics", "ml-intro", "rl-intro"]); +} + +// RRF fuses two vector arms (no embedding creds — explicit vectors). A doc near +// BOTH query vectors out-ranks one near only one. +#[tokio::test] +#[serial] +async fn rrf_fuses_two_vector_queries() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_search_db(&dir).await; + let r = query_main( + &mut db, + SEARCH_QUERIES, + "rrf_two_vectors", + &two_vector_params("$q1", &[0.1, 0.2, 0.3, 0.4], "$q2", &[0.5, 0.6, 0.7, 0.8]), + ) + .await + .unwrap(); + assert_eq!(result_slugs(&r), vec!["rl-intro", "ml-intro", "dl-basics"]); +} + #[tokio::test] #[serial] async fn mutation_commit_refreshes_search_indices_without_manual_ensure() {