mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-25 00:36:31 +02:00
feat: enhance date handling and indexing logic across connectors
- Added normalization for "undefined" strings to None in date parameters to prevent parsing errors. - Improved date range validation to ensure start_date is strictly before end_date, adjusting end_date if necessary. - Updated Google Calendar and Composio connector indexing logic to handle duplicate content more effectively, logging warnings for skipped events. - Enhanced error handling during final commits to manage integrity errors gracefully. - Refactored date handling in various connector indexers for consistency and reliability.
This commit is contained in:
parent
08f16b43d7
commit
d20bb385b5
9 changed files with 83 additions and 13 deletions
|
|
@ -285,6 +285,13 @@ class GoogleGmailConnector:
|
|||
try:
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Normalize date values - handle "undefined" strings from frontend
|
||||
# This prevents "time data 'undefined' does not match format" errors
|
||||
if start_date == "undefined" or start_date == "":
|
||||
start_date = None
|
||||
if end_date == "undefined" or end_date == "":
|
||||
end_date = None
|
||||
|
||||
# Build date query
|
||||
query_parts = []
|
||||
|
||||
|
|
|
|||
|
|
@ -644,20 +644,30 @@ async def index_connector_content(
|
|||
|
||||
# Handle different connector types
|
||||
response_message = ""
|
||||
today_str = datetime.now().strftime("%Y-%m-%d")
|
||||
# Use UTC for consistency with last_indexed_at storage
|
||||
today_str = datetime.now(UTC).strftime("%Y-%m-%d")
|
||||
|
||||
# Determine the actual date range to use
|
||||
if start_date is None:
|
||||
# Use last_indexed_at or default to 365 days ago
|
||||
if connector.last_indexed_at:
|
||||
today = datetime.now().date()
|
||||
if connector.last_indexed_at.date() == today:
|
||||
# Convert last_indexed_at to timezone-naive for comparison (like calculate_date_range does)
|
||||
last_indexed_naive = (
|
||||
connector.last_indexed_at.replace(tzinfo=None)
|
||||
if connector.last_indexed_at.tzinfo
|
||||
else connector.last_indexed_at
|
||||
)
|
||||
# Use UTC for "today" to match how last_indexed_at is stored
|
||||
today_utc = datetime.now(UTC).replace(tzinfo=None).date()
|
||||
last_indexed_date = last_indexed_naive.date()
|
||||
|
||||
if last_indexed_date == today_utc:
|
||||
# If last indexed today, go back 1 day to ensure we don't miss anything
|
||||
indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||
else:
|
||||
indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
|
||||
indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
|
||||
else:
|
||||
indexing_from = (datetime.now() - timedelta(days=365)).strftime(
|
||||
indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
|
||||
"%Y-%m-%d"
|
||||
)
|
||||
else:
|
||||
|
|
@ -666,6 +676,7 @@ async def index_connector_content(
|
|||
# For calendar connectors, default to today but allow future dates if explicitly provided
|
||||
if connector.connector_type in [
|
||||
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
|
||||
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR,
|
||||
]:
|
||||
# Default to today if no end_date provided (users can manually select future dates)
|
||||
|
|
@ -977,6 +988,9 @@ async def index_connector_content(
|
|||
index_composio_connector_task,
|
||||
)
|
||||
|
||||
# For Composio Gmail and Calendar, use the same date calculation logic as normal connectors
|
||||
# This ensures consistent behavior and uses last_indexed_at to reduce API calls
|
||||
# (includes special case: if indexed today, go back 1 day to avoid missing data)
|
||||
logger.info(
|
||||
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -112,6 +112,13 @@ def calculate_date_range(
|
|||
Returns:
|
||||
Tuple of (start_date_str, end_date_str)
|
||||
"""
|
||||
# Normalize "undefined" strings to None (from frontend)
|
||||
# This prevents parsing errors and ensures consistent behavior across all indexers
|
||||
if start_date == "undefined" or start_date == "":
|
||||
start_date = None
|
||||
if end_date == "undefined" or end_date == "":
|
||||
end_date = None
|
||||
|
||||
if start_date is not None and end_date is not None:
|
||||
return start_date, end_date
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Google Calendar connector indexer.
|
|||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pytz
|
||||
from dateutil.parser import isoparse
|
||||
from google.oauth2.credentials import Credentials
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -205,6 +207,23 @@ async def index_google_calendar_events(
|
|||
# Use provided dates (including future dates)
|
||||
start_date_str = start_date
|
||||
end_date_str = end_date
|
||||
|
||||
# If start_date and end_date are the same, adjust end_date to be one day later
|
||||
# to ensure valid date range (start_date must be strictly before end_date)
|
||||
if start_date_str == end_date_str:
|
||||
# Parse the date and add one day to ensure valid range
|
||||
dt = isoparse(end_date_str)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=pytz.UTC)
|
||||
else:
|
||||
dt = dt.astimezone(pytz.UTC)
|
||||
# Add one day to end_date to make it strictly after start_date
|
||||
dt_end = dt + timedelta(days=1)
|
||||
end_date_str = dt_end.strftime("%Y-%m-%d")
|
||||
logger.info(
|
||||
f"Adjusted end_date from {end_date} to {end_date_str} "
|
||||
f"to ensure valid date range (start_date must be strictly before end_date)"
|
||||
)
|
||||
|
||||
await task_logger.log_task_progress(
|
||||
log_entry,
|
||||
|
|
|
|||
|
|
@ -116,6 +116,13 @@ async def index_luma_events(
|
|||
|
||||
luma_client = LumaConnector(api_key=api_key)
|
||||
|
||||
# Handle 'undefined' string from frontend (treat as None)
|
||||
# This prevents "time data 'undefined' does not match format" errors
|
||||
if start_date == "undefined" or start_date == "":
|
||||
start_date = None
|
||||
if end_date == "undefined" or end_date == "":
|
||||
end_date = None
|
||||
|
||||
# Calculate date range
|
||||
# For calendar connectors, allow future dates to index upcoming events
|
||||
if start_date is None or end_date is None:
|
||||
|
|
|
|||
|
|
@ -259,7 +259,13 @@ export const ConnectorIndicator: FC = () => {
|
|||
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
|
||||
? () => {
|
||||
startIndexing(editingConnector.id);
|
||||
handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type, stopIndexing);
|
||||
handleQuickIndexConnector(
|
||||
editingConnector.id,
|
||||
editingConnector.connector_type,
|
||||
stopIndexing,
|
||||
startDate,
|
||||
endDate
|
||||
);
|
||||
}
|
||||
: undefined
|
||||
}
|
||||
|
|
|
|||
|
|
@ -272,8 +272,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
|||
Re-indexing runs in the background
|
||||
</p>
|
||||
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
||||
You can continue using SurfSense while we sync your data. Check the Active tab
|
||||
to see progress.
|
||||
You can continue using SurfSense while we sync your data. Check inbox for updates.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -189,8 +189,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
|||
<div className="text-xs sm:text-sm">
|
||||
<p className="font-medium text-xs sm:text-sm">Indexing runs in the background</p>
|
||||
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
||||
You can continue using SurfSense while we sync your data. Check the Active tab
|
||||
to see progress.
|
||||
You can continue using SurfSense while we sync your data. Check inbox for updates.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -1400,9 +1400,15 @@ export const useConnectorDialog = () => {
|
|||
[editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
|
||||
);
|
||||
|
||||
// Handle quick index (index without date picker, uses backend defaults)
|
||||
// Handle quick index (index with selected date range, or backend defaults if none selected)
|
||||
const handleQuickIndexConnector = useCallback(
|
||||
async (connectorId: number, connectorType?: string, stopIndexing?: (id: number) => void) => {
|
||||
async (
|
||||
connectorId: number,
|
||||
connectorType?: string,
|
||||
stopIndexing?: (id: number) => void,
|
||||
startDate?: Date,
|
||||
endDate?: Date
|
||||
) => {
|
||||
if (!searchSpaceId) return;
|
||||
|
||||
// Track quick index clicked event
|
||||
|
|
@ -1411,10 +1417,16 @@ export const useConnectorDialog = () => {
|
|||
}
|
||||
|
||||
try {
|
||||
// Format dates if provided, otherwise pass undefined (backend will use defaults)
|
||||
const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined;
|
||||
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
|
||||
|
||||
await indexConnector({
|
||||
connector_id: connectorId,
|
||||
queryParams: {
|
||||
search_space_id: searchSpaceId,
|
||||
start_date: startDateStr,
|
||||
end_date: endDateStr,
|
||||
},
|
||||
});
|
||||
toast.success("Indexing started", {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue