mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-07-02 22:01:05 +02:00
feat: implement fuzzy search in mention document
This commit is contained in:
parent
b001b65067
commit
293de6876a
1 changed files with 23 additions and 10 deletions
|
|
@ -444,7 +444,8 @@ async def search_document_titles(
|
||||||
Lightweight document title search optimized for mention picker (@mentions).
|
Lightweight document title search optimized for mention picker (@mentions).
|
||||||
|
|
||||||
Returns only id, title, and document_type - no content or metadata.
|
Returns only id, title, and document_type - no content or metadata.
|
||||||
Results are ordered by relevance: prefix matches first, then contains matches.
|
Uses pg_trgm fuzzy search with similarity scoring for typo tolerance.
|
||||||
|
Results are ordered by relevance using trigram similarity scores.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
search_space_id: The search space to search in. Required.
|
search_space_id: The search space to search in. Required.
|
||||||
|
|
@ -457,7 +458,7 @@ async def search_document_titles(
|
||||||
Returns:
|
Returns:
|
||||||
DocumentTitleSearchResponse: Lightweight list with has_more flag (no total count).
|
DocumentTitleSearchResponse: Lightweight list with has_more flag (no total count).
|
||||||
"""
|
"""
|
||||||
from sqlalchemy import case, literal
|
from sqlalchemy import desc, func, or_
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Check permission for the search space
|
# Check permission for the search space
|
||||||
|
|
@ -480,17 +481,29 @@ async def search_document_titles(
|
||||||
if len(title.strip()) < 2:
|
if len(title.strip()) < 2:
|
||||||
query = query.order_by(Document.updated_at.desc().nullslast())
|
query = query.order_by(Document.updated_at.desc().nullslast())
|
||||||
else:
|
else:
|
||||||
# Apply title filter with ILIKE (uses pg_trgm index)
|
# Fuzzy search using pg_trgm similarity + ILIKE fallback
|
||||||
search_term = title.strip()
|
search_term = title.strip()
|
||||||
query = query.filter(Document.title.ilike(f"%{search_term}%"))
|
|
||||||
|
|
||||||
# Order by relevance: prefix matches first, then alphabetical
|
# Similarity threshold for fuzzy matching (0.3 = ~30% trigram overlap)
|
||||||
# CASE WHEN title ILIKE 'term%' THEN 0 ELSE 1 END
|
# Lower values = more fuzzy, higher values = stricter matching
|
||||||
prefix_priority = case(
|
similarity_threshold = 0.3
|
||||||
(Document.title.ilike(f"{search_term}%"), literal(0)),
|
|
||||||
else_=literal(1),
|
# Match documents that either:
|
||||||
|
# 1. Have high trigram similarity (fuzzy match - handles typos)
|
||||||
|
# 2. Contain the exact substring (ILIKE - handles partial matches)
|
||||||
|
query = query.filter(
|
||||||
|
or_(
|
||||||
|
func.similarity(Document.title, search_term) > similarity_threshold,
|
||||||
|
Document.title.ilike(f"%{search_term}%"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Order by similarity score (descending) for best relevance ranking
|
||||||
|
# Higher similarity = better match = appears first
|
||||||
|
query = query.order_by(
|
||||||
|
desc(func.similarity(Document.title, search_term)),
|
||||||
|
Document.title, # Alphabetical tiebreaker
|
||||||
)
|
)
|
||||||
query = query.order_by(prefix_priority, Document.title)
|
|
||||||
|
|
||||||
# Fetch page_size + 1 to determine has_more without COUNT query
|
# Fetch page_size + 1 to determine has_more without COUNT query
|
||||||
offset = page * page_size
|
offset = page * page_size
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue