feat: implement fuzzy search in mention document

2026-07-02 22:01:05 +02:00 · 2026-01-17 20:46:47 +05:30 · 2026-01-17 20:46:47 +05:30 · 293de6876a
commit 293de6876a
parent b001b65067
1 changed files with 23 additions and 10 deletions
--- a/surfsense_backend/app/routes/documents_routes.py
+++ b/surfsense_backend/app/routes/documents_routes.py
@ -444,7 +444,8 @@ async def search_document_titles(
    Lightweight document title search optimized for mention picker (@mentions).
    Returns only id, title, and document_type - no content or metadata.
-    Results are ordered by relevance: prefix matches first, then contains matches.
+    Uses pg_trgm fuzzy search with similarity scoring for typo tolerance.
    Results are ordered by relevance using trigram similarity scores.
    Args:
        search_space_id: The search space to search in. Required.
@ -457,7 +458,7 @@ async def search_document_titles(
    Returns:
        DocumentTitleSearchResponse: Lightweight list with has_more flag (no total count).
    """
-    from sqlalchemy import case, literal
+    from sqlalchemy import desc, func, or_
    try:
        # Check permission for the search space
@ -480,17 +481,29 @@ async def search_document_titles(
        if len(title.strip()) < 2:
            query = query.order_by(Document.updated_at.desc().nullslast())
        else:
-            # Apply title filter with ILIKE (uses pg_trgm index)
+            # Fuzzy search using pg_trgm similarity + ILIKE fallback
            search_term = title.strip()
            query = query.filter(Document.title.ilike(f"%{search_term}%"))
-            # Order by relevance: prefix matches first, then alphabetical
+            # Similarity threshold for fuzzy matching (0.3 = ~30% trigram overlap)
-            # CASE WHEN title ILIKE 'term%' THEN 0 ELSE 1 END
+            # Lower values = more fuzzy, higher values = stricter matching
-            prefix_priority = case(
+            similarity_threshold = 0.3
-                (Document.title.ilike(f"{search_term}%"), literal(0)),
+
-                else_=literal(1),
+            # Match documents that either:
            # 1. Have high trigram similarity (fuzzy match - handles typos)
            # 2. Contain the exact substring (ILIKE - handles partial matches)
            query = query.filter(
                or_(
                    func.similarity(Document.title, search_term) > similarity_threshold,
                    Document.title.ilike(f"%{search_term}%"),
                )
            )
            # Order by similarity score (descending) for best relevance ranking
            # Higher similarity = better match = appears first
            query = query.order_by(
                desc(func.similarity(Document.title, search_term)),
                Document.title,  # Alphabetical tiebreaker
            )
            query = query.order_by(prefix_priority, Document.title)
        # Fetch page_size + 1 to determine has_more without COUNT query
        offset = page * page_size