mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-27 01:36:30 +02:00
feat: implement fuzzy search in mention document
This commit is contained in:
parent
b001b65067
commit
293de6876a
1 changed files with 23 additions and 10 deletions
|
|
@ -444,7 +444,8 @@ async def search_document_titles(
|
|||
Lightweight document title search optimized for mention picker (@mentions).
|
||||
|
||||
Returns only id, title, and document_type - no content or metadata.
|
||||
Results are ordered by relevance: prefix matches first, then contains matches.
|
||||
Uses pg_trgm fuzzy search with similarity scoring for typo tolerance.
|
||||
Results are ordered by relevance using trigram similarity scores.
|
||||
|
||||
Args:
|
||||
search_space_id: The search space to search in. Required.
|
||||
|
|
@ -457,7 +458,7 @@ async def search_document_titles(
|
|||
Returns:
|
||||
DocumentTitleSearchResponse: Lightweight list with has_more flag (no total count).
|
||||
"""
|
||||
from sqlalchemy import case, literal
|
||||
from sqlalchemy import desc, func, or_
|
||||
|
||||
try:
|
||||
# Check permission for the search space
|
||||
|
|
@ -480,17 +481,29 @@ async def search_document_titles(
|
|||
if len(title.strip()) < 2:
|
||||
query = query.order_by(Document.updated_at.desc().nullslast())
|
||||
else:
|
||||
# Apply title filter with ILIKE (uses pg_trgm index)
|
||||
# Fuzzy search using pg_trgm similarity + ILIKE fallback
|
||||
search_term = title.strip()
|
||||
query = query.filter(Document.title.ilike(f"%{search_term}%"))
|
||||
|
||||
# Order by relevance: prefix matches first, then alphabetical
|
||||
# CASE WHEN title ILIKE 'term%' THEN 0 ELSE 1 END
|
||||
prefix_priority = case(
|
||||
(Document.title.ilike(f"{search_term}%"), literal(0)),
|
||||
else_=literal(1),
|
||||
# Similarity threshold for fuzzy matching (0.3 = ~30% trigram overlap)
|
||||
# Lower values = more fuzzy, higher values = stricter matching
|
||||
similarity_threshold = 0.3
|
||||
|
||||
# Match documents that either:
|
||||
# 1. Have high trigram similarity (fuzzy match - handles typos)
|
||||
# 2. Contain the exact substring (ILIKE - handles partial matches)
|
||||
query = query.filter(
|
||||
or_(
|
||||
func.similarity(Document.title, search_term) > similarity_threshold,
|
||||
Document.title.ilike(f"%{search_term}%"),
|
||||
)
|
||||
)
|
||||
|
||||
# Order by similarity score (descending) for best relevance ranking
|
||||
# Higher similarity = better match = appears first
|
||||
query = query.order_by(
|
||||
desc(func.similarity(Document.title, search_term)),
|
||||
Document.title, # Alphabetical tiebreaker
|
||||
)
|
||||
query = query.order_by(prefix_priority, Document.title)
|
||||
|
||||
# Fetch page_size + 1 to determine has_more without COUNT query
|
||||
offset = page * page_size
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue