mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-08 20:25:19 +02:00
feat: implement time-based filtering for document retrieval using 'updated_at' timestamp
This commit is contained in:
parent
d97136792c
commit
919c323ef3
6 changed files with 324 additions and 16 deletions
|
|
@ -0,0 +1,40 @@
|
|||
"""47_copy_created_at_to_updated_at
|
||||
|
||||
Revision ID: 47
|
||||
Revises: 46
|
||||
Create Date: 2025-12-12
|
||||
|
||||
Copies created_at values to updated_at for all documents where updated_at is NULL.
|
||||
This ensures time-based filtering in retrievers works correctly for all documents.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = "47"
|
||||
down_revision: str | None = "46"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema - Copy created_at to updated_at where updated_at is NULL."""
|
||||
# Set updated_at to created_at for all documents that don't have an updated_at value
|
||||
op.execute(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET updated_at = created_at
|
||||
WHERE updated_at IS NULL
|
||||
AND created_at IS NOT NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema - Set updated_at back to NULL where it was copied from created_at."""
|
||||
# Note: This is a lossy downgrade - we cannot distinguish between documents
|
||||
# that had updated_at set by this migration vs. other sources.
|
||||
# For safety, we don't automatically revert these changes.
|
||||
pass
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
import json
|
||||
import logging
|
||||
import traceback
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.messages import HumanMessage, SystemMessage
|
||||
|
|
@ -21,6 +22,9 @@ from .qna_agent.graph import graph as qna_agent_graph
|
|||
from .state import State
|
||||
from .utils import get_connector_emoji, get_connector_friendly_name
|
||||
|
||||
# Time filter constants - hardcoded 2 year time range for now
|
||||
DEFAULT_TIME_FILTER_YEARS = 2
|
||||
|
||||
|
||||
def extract_sources_from_documents(
|
||||
all_documents: list[dict[str, Any]],
|
||||
|
|
@ -524,6 +528,8 @@ async def fetch_relevant_documents(
|
|||
connector_service: ConnectorService = None,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
user_selected_sources: list[dict[str, Any]] | None = None,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Fetch relevant documents for research questions using the provided connectors.
|
||||
|
|
@ -542,6 +548,10 @@ async def fetch_relevant_documents(
|
|||
state: The current state containing the streaming service
|
||||
top_k: Number of top results to retrieve per connector per question
|
||||
connector_service: An initialized connector service to use for searching
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
user_selected_sources: Optional list of user-selected source objects
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of relevant documents
|
||||
|
|
@ -620,6 +630,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -646,6 +658,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -672,6 +686,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -695,6 +711,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -718,6 +736,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -744,6 +764,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -770,6 +792,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -796,6 +820,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -922,6 +948,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Add to sources and raw documents
|
||||
if source_object:
|
||||
|
|
@ -943,6 +971,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -968,6 +998,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -993,6 +1025,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1018,6 +1052,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1043,6 +1079,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1068,6 +1106,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1094,6 +1134,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1120,6 +1162,8 @@ async def fetch_relevant_documents(
|
|||
search_space_id=search_space_id,
|
||||
top_k=top_k,
|
||||
search_mode=search_mode,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
|
||||
# Add to sources and raw documents
|
||||
|
|
@ -1387,6 +1431,10 @@ async def handle_qna_workflow(
|
|||
# Use the reformulated query as a single research question
|
||||
research_questions = [reformulated_query, user_query]
|
||||
|
||||
# Calculate time filter: last 2 years from now (hardcoded for now)
|
||||
end_date = datetime.now(UTC)
|
||||
start_date = end_date - timedelta(days=DEFAULT_TIME_FILTER_YEARS * 365)
|
||||
|
||||
relevant_documents = await fetch_relevant_documents(
|
||||
research_questions=research_questions,
|
||||
search_space_id=configuration.search_space_id,
|
||||
|
|
@ -1398,6 +1446,8 @@ async def handle_qna_workflow(
|
|||
connector_service=connector_service,
|
||||
search_mode=configuration.search_mode,
|
||||
user_selected_sources=user_selected_sources,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
except Exception as e:
|
||||
error_message = f"Error fetching relevant documents for QNA: {e!s}"
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
|
|||
from sqlalchemy.orm import DeclarativeBase, Mapped, declared_attr, relationship
|
||||
|
||||
from app.config import config
|
||||
from app.retriever.chunks_hybrid_search import ChucksHybridSearchRetriever
|
||||
from app.retriever.documents_hybrid_search import DocumentHybridSearchRetriever
|
||||
|
||||
if config.AUTH_TYPE == "GOOGLE":
|
||||
from fastapi_users.db import SQLAlchemyBaseOAuthAccountTableUUID
|
||||
|
|
@ -799,18 +797,6 @@ else:
|
|||
yield SQLAlchemyUserDatabase(session, User)
|
||||
|
||||
|
||||
async def get_chucks_hybrid_search_retriever(
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
return ChucksHybridSearchRetriever(session)
|
||||
|
||||
|
||||
async def get_documents_hybrid_search_retriever(
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
return DocumentHybridSearchRetriever(session)
|
||||
|
||||
|
||||
def has_permission(user_permissions: list[str], required_permission: str) -> bool:
|
||||
"""
|
||||
Check if the user has the required permission.
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
class ChucksHybridSearchRetriever:
|
||||
def __init__(self, db_session):
|
||||
"""
|
||||
|
|
@ -13,6 +16,8 @@ class ChucksHybridSearchRetriever:
|
|||
query_text: str,
|
||||
top_k: int,
|
||||
search_space_id: int,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Perform vector similarity search on chunks.
|
||||
|
|
@ -21,6 +26,8 @@ class ChucksHybridSearchRetriever:
|
|||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of chunks sorted by vector similarity
|
||||
|
|
@ -43,6 +50,12 @@ class ChucksHybridSearchRetriever:
|
|||
.where(Document.search_space_id == search_space_id)
|
||||
)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
query = query.where(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
query = query.where(Document.updated_at <= end_date)
|
||||
|
||||
# Add vector similarity ordering
|
||||
query = query.order_by(Chunk.embedding.op("<=>")(query_embedding)).limit(top_k)
|
||||
|
||||
|
|
@ -57,6 +70,8 @@ class ChucksHybridSearchRetriever:
|
|||
query_text: str,
|
||||
top_k: int,
|
||||
search_space_id: int,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Perform full-text keyword search on chunks.
|
||||
|
|
@ -65,6 +80,8 @@ class ChucksHybridSearchRetriever:
|
|||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of chunks sorted by text relevance
|
||||
|
|
@ -89,6 +106,12 @@ class ChucksHybridSearchRetriever:
|
|||
) # Only include results that match the query
|
||||
)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
query = query.where(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
query = query.where(Document.updated_at <= end_date)
|
||||
|
||||
# Add text search ranking
|
||||
query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)
|
||||
|
||||
|
|
@ -104,6 +127,8 @@ class ChucksHybridSearchRetriever:
|
|||
top_k: int,
|
||||
search_space_id: int,
|
||||
document_type: str | None = None,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Combine vector similarity and full-text search results using Reciprocal Rank Fusion.
|
||||
|
|
@ -113,6 +138,8 @@ class ChucksHybridSearchRetriever:
|
|||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing chunk data and relevance scores
|
||||
|
|
@ -151,6 +178,12 @@ class ChucksHybridSearchRetriever:
|
|||
else:
|
||||
base_conditions.append(Document.document_type == document_type)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
base_conditions.append(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
base_conditions.append(Document.updated_at <= end_date)
|
||||
|
||||
# CTE for semantic search filtered by search space
|
||||
semantic_search_cte = (
|
||||
select(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,6 @@
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
class DocumentHybridSearchRetriever:
|
||||
def __init__(self, db_session):
|
||||
"""
|
||||
|
|
@ -13,6 +16,8 @@ class DocumentHybridSearchRetriever:
|
|||
query_text: str,
|
||||
top_k: int,
|
||||
search_space_id: int,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Perform vector similarity search on documents.
|
||||
|
|
@ -21,6 +26,8 @@ class DocumentHybridSearchRetriever:
|
|||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of documents sorted by vector similarity
|
||||
|
|
@ -42,6 +49,12 @@ class DocumentHybridSearchRetriever:
|
|||
.where(Document.search_space_id == search_space_id)
|
||||
)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
query = query.where(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
query = query.where(Document.updated_at <= end_date)
|
||||
|
||||
# Add vector similarity ordering
|
||||
query = query.order_by(Document.embedding.op("<=>")(query_embedding)).limit(
|
||||
top_k
|
||||
|
|
@ -58,6 +71,8 @@ class DocumentHybridSearchRetriever:
|
|||
query_text: str,
|
||||
top_k: int,
|
||||
search_space_id: int,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Perform full-text keyword search on documents.
|
||||
|
|
@ -66,6 +81,8 @@ class DocumentHybridSearchRetriever:
|
|||
query_text: The search query text
|
||||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
List of documents sorted by text relevance
|
||||
|
|
@ -89,6 +106,12 @@ class DocumentHybridSearchRetriever:
|
|||
) # Only include results that match the query
|
||||
)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
query = query.where(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
query = query.where(Document.updated_at <= end_date)
|
||||
|
||||
# Add text search ranking
|
||||
query = query.order_by(func.ts_rank_cd(tsvector, tsquery).desc()).limit(top_k)
|
||||
|
||||
|
|
@ -104,6 +127,8 @@ class DocumentHybridSearchRetriever:
|
|||
top_k: int,
|
||||
search_space_id: int,
|
||||
document_type: str | None = None,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> list:
|
||||
"""
|
||||
Combine vector similarity and full-text search results using Reciprocal Rank Fusion.
|
||||
|
|
@ -113,6 +138,8 @@ class DocumentHybridSearchRetriever:
|
|||
top_k: Number of results to return
|
||||
search_space_id: The search space ID to search within
|
||||
document_type: Optional document type to filter results (e.g., "FILE", "CRAWLED_URL")
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
"""
|
||||
from sqlalchemy import func, select, text
|
||||
|
|
@ -149,6 +176,12 @@ class DocumentHybridSearchRetriever:
|
|||
else:
|
||||
base_conditions.append(Document.document_type == document_type)
|
||||
|
||||
# Add time-based filtering if provided
|
||||
if start_date is not None:
|
||||
base_conditions.append(Document.updated_at >= start_date)
|
||||
if end_date is not None:
|
||||
base_conditions.append(Document.updated_at <= end_date)
|
||||
|
||||
# CTE for semantic search filtered by search space
|
||||
semantic_search_cte = select(
|
||||
Document.id,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import asyncio
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
|
@ -63,6 +64,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for crawled URLs and return both the source information and langchain documents
|
||||
|
|
@ -72,6 +75,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -82,6 +87,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CRAWLED_URL",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
crawled_urls_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -89,6 +96,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CRAWLED_URL",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
crawled_urls_chunks = self._transform_document_results(crawled_urls_chunks)
|
||||
|
|
@ -168,10 +177,20 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for files and return both the source information and langchain documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
|
|
@ -181,6 +200,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="FILE",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
files_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -188,6 +209,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="FILE",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
files_chunks = self._transform_document_results(files_chunks)
|
||||
|
|
@ -807,10 +830,20 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for slack and return both the source information and langchain documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
|
|
@ -820,6 +853,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="SLACK_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
slack_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -827,6 +862,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="SLACK_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
slack_chunks = self._transform_document_results(slack_chunks)
|
||||
|
|
@ -892,6 +929,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Notion pages and return both the source information and langchain documents
|
||||
|
|
@ -900,6 +939,9 @@ class ConnectorService:
|
|||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -910,6 +952,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="NOTION_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
notion_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -917,6 +961,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="NOTION_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
notion_chunks = self._transform_document_results(notion_chunks)
|
||||
|
|
@ -985,6 +1031,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for extension data and return both the source information and langchain documents
|
||||
|
|
@ -993,6 +1041,9 @@ class ConnectorService:
|
|||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1003,6 +1054,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="EXTENSION",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
extension_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1010,6 +1063,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="EXTENSION",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
extension_chunks = self._transform_document_results(extension_chunks)
|
||||
|
|
@ -1102,6 +1157,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for YouTube videos and return both the source information and langchain documents
|
||||
|
|
@ -1110,6 +1167,9 @@ class ConnectorService:
|
|||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1120,6 +1180,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="YOUTUBE_VIDEO",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
youtube_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1127,6 +1189,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="YOUTUBE_VIDEO",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
youtube_chunks = self._transform_document_results(youtube_chunks)
|
||||
|
|
@ -1195,10 +1259,20 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for GitHub documents and return both the source information and langchain documents
|
||||
|
||||
Args:
|
||||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
"""
|
||||
|
|
@ -1208,6 +1282,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GITHUB_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
github_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1215,6 +1291,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GITHUB_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
github_chunks = self._transform_document_results(github_chunks)
|
||||
|
|
@ -1267,6 +1345,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Linear issues and comments and return both the source information and langchain documents
|
||||
|
|
@ -1275,6 +1355,9 @@ class ConnectorService:
|
|||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1285,6 +1368,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LINEAR_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
linear_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1292,6 +1377,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LINEAR_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
linear_chunks = self._transform_document_results(linear_chunks)
|
||||
|
|
@ -1372,6 +1459,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Jira issues and comments and return both the source information and langchain documents
|
||||
|
|
@ -1381,6 +1470,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1391,6 +1482,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="JIRA_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
jira_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1398,6 +1491,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="JIRA_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
jira_chunks = self._transform_document_results(jira_chunks)
|
||||
|
|
@ -1489,6 +1584,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Google Calendar events and return both the source information and langchain documents
|
||||
|
|
@ -1498,6 +1595,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1508,6 +1607,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_CALENDAR_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
calendar_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1515,6 +1616,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_CALENDAR_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
calendar_chunks = self._transform_document_results(calendar_chunks)
|
||||
|
|
@ -1618,6 +1721,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Airtable records and return both the source information and langchain documents
|
||||
|
|
@ -1627,6 +1732,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1637,6 +1744,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="AIRTABLE_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
airtable_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1644,6 +1753,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="AIRTABLE_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
airtable_chunks = self._transform_document_results(airtable_chunks)
|
||||
|
|
@ -1702,6 +1813,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Gmail messages and return both the source information and langchain documents
|
||||
|
|
@ -1711,6 +1824,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1721,6 +1836,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
gmail_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1728,6 +1845,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="GOOGLE_GMAIL_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
gmail_chunks = self._transform_document_results(gmail_chunks)
|
||||
|
|
@ -1822,6 +1941,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Confluence pages and return both the source information and langchain documents
|
||||
|
|
@ -1831,6 +1952,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1841,6 +1964,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CONFLUENCE_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
confluence_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1848,6 +1973,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CONFLUENCE_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
confluence_chunks = self._transform_document_results(confluence_chunks)
|
||||
|
|
@ -1913,6 +2040,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for ClickUp tasks and return both the source information and langchain documents
|
||||
|
|
@ -1922,6 +2051,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -1932,6 +2063,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CLICKUP_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
clickup_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -1939,6 +2072,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="CLICKUP_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
clickup_chunks = self._transform_document_results(clickup_chunks)
|
||||
|
|
@ -2146,6 +2281,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Discord messages and return both the source information and langchain documents
|
||||
|
|
@ -2154,6 +2291,9 @@ class ConnectorService:
|
|||
user_query: The user's query
|
||||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -2164,6 +2304,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
discord_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -2171,6 +2313,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="DISCORD_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
discord_chunks = self._transform_document_results(discord_chunks)
|
||||
|
|
@ -2239,6 +2383,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Luma events and return both the source information and langchain documents
|
||||
|
|
@ -2248,6 +2394,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -2258,6 +2406,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LUMA_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
luma_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -2265,6 +2415,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="LUMA_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
luma_chunks = self._transform_document_results(luma_chunks)
|
||||
|
|
@ -2393,6 +2545,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for Elasticsearch documents and return both the source information and langchain documents
|
||||
|
|
@ -2402,6 +2556,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -2412,6 +2568,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="ELASTICSEARCH_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
elasticsearch_chunks = await self.document_retriever.hybrid_search(
|
||||
|
|
@ -2419,6 +2577,8 @@ class ConnectorService:
|
|||
top_k=top_k,
|
||||
search_space_id=search_space_id,
|
||||
document_type="ELASTICSEARCH_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
elasticsearch_chunks = self._transform_document_results(
|
||||
|
|
@ -2504,6 +2664,8 @@ class ConnectorService:
|
|||
search_space_id: int,
|
||||
top_k: int = 20,
|
||||
search_mode: SearchMode = SearchMode.CHUNKS,
|
||||
start_date: datetime | None = None,
|
||||
end_date: datetime | None = None,
|
||||
) -> tuple:
|
||||
"""
|
||||
Search for BookStack pages and return both the source information and langchain documents
|
||||
|
|
@ -2514,6 +2676,8 @@ class ConnectorService:
|
|||
search_space_id: The search space ID to search in
|
||||
top_k: Maximum number of results to return
|
||||
search_mode: Search mode (CHUNKS or DOCUMENTS)
|
||||
start_date: Optional start date for filtering documents by updated_at
|
||||
end_date: Optional end date for filtering documents by updated_at
|
||||
|
||||
Returns:
|
||||
tuple: (sources_info, langchain_documents)
|
||||
|
|
@ -2522,17 +2686,19 @@ class ConnectorService:
|
|||
bookstack_chunks = await self.chunk_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="BOOKSTACK_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
elif search_mode == SearchMode.DOCUMENTS:
|
||||
bookstack_chunks = await self.document_retriever.hybrid_search(
|
||||
query_text=user_query,
|
||||
top_k=top_k,
|
||||
user_id=user_id,
|
||||
search_space_id=search_space_id,
|
||||
document_type="BOOKSTACK_CONNECTOR",
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
)
|
||||
# Transform document retriever results to match expected format
|
||||
bookstack_chunks = self._transform_document_results(bookstack_chunks)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue