mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-01 03:46:25 +02:00
feat: enhance date handling and indexing logic across connectors
- Added normalization for "undefined" strings to None in date parameters to prevent parsing errors. - Improved date range validation to ensure start_date is strictly before end_date, adjusting end_date if necessary. - Updated Google Calendar and Composio connector indexing logic to handle duplicate content more effectively, logging warnings for skipped events. - Enhanced error handling during final commits to manage integrity errors gracefully. - Refactored date handling in various connector indexers for consistency and reliability.
This commit is contained in:
parent
08f16b43d7
commit
d20bb385b5
9 changed files with 83 additions and 13 deletions
|
|
@ -285,6 +285,13 @@ class GoogleGmailConnector:
|
||||||
try:
|
try:
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
# Normalize date values - handle "undefined" strings from frontend
|
||||||
|
# This prevents "time data 'undefined' does not match format" errors
|
||||||
|
if start_date == "undefined" or start_date == "":
|
||||||
|
start_date = None
|
||||||
|
if end_date == "undefined" or end_date == "":
|
||||||
|
end_date = None
|
||||||
|
|
||||||
# Build date query
|
# Build date query
|
||||||
query_parts = []
|
query_parts = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -644,20 +644,30 @@ async def index_connector_content(
|
||||||
|
|
||||||
# Handle different connector types
|
# Handle different connector types
|
||||||
response_message = ""
|
response_message = ""
|
||||||
today_str = datetime.now().strftime("%Y-%m-%d")
|
# Use UTC for consistency with last_indexed_at storage
|
||||||
|
today_str = datetime.now(UTC).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
# Determine the actual date range to use
|
# Determine the actual date range to use
|
||||||
if start_date is None:
|
if start_date is None:
|
||||||
# Use last_indexed_at or default to 365 days ago
|
# Use last_indexed_at or default to 365 days ago
|
||||||
if connector.last_indexed_at:
|
if connector.last_indexed_at:
|
||||||
today = datetime.now().date()
|
# Convert last_indexed_at to timezone-naive for comparison (like calculate_date_range does)
|
||||||
if connector.last_indexed_at.date() == today:
|
last_indexed_naive = (
|
||||||
|
connector.last_indexed_at.replace(tzinfo=None)
|
||||||
|
if connector.last_indexed_at.tzinfo
|
||||||
|
else connector.last_indexed_at
|
||||||
|
)
|
||||||
|
# Use UTC for "today" to match how last_indexed_at is stored
|
||||||
|
today_utc = datetime.now(UTC).replace(tzinfo=None).date()
|
||||||
|
last_indexed_date = last_indexed_naive.date()
|
||||||
|
|
||||||
|
if last_indexed_date == today_utc:
|
||||||
# If last indexed today, go back 1 day to ensure we don't miss anything
|
# If last indexed today, go back 1 day to ensure we don't miss anything
|
||||||
indexing_from = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
indexing_from = (today_utc - timedelta(days=1)).strftime("%Y-%m-%d")
|
||||||
else:
|
else:
|
||||||
indexing_from = connector.last_indexed_at.strftime("%Y-%m-%d")
|
indexing_from = last_indexed_naive.strftime("%Y-%m-%d")
|
||||||
else:
|
else:
|
||||||
indexing_from = (datetime.now() - timedelta(days=365)).strftime(
|
indexing_from = (datetime.now(UTC).replace(tzinfo=None) - timedelta(days=365)).strftime(
|
||||||
"%Y-%m-%d"
|
"%Y-%m-%d"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
@ -666,6 +676,7 @@ async def index_connector_content(
|
||||||
# For calendar connectors, default to today but allow future dates if explicitly provided
|
# For calendar connectors, default to today but allow future dates if explicitly provided
|
||||||
if connector.connector_type in [
|
if connector.connector_type in [
|
||||||
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
|
SearchSourceConnectorType.GOOGLE_CALENDAR_CONNECTOR,
|
||||||
|
SearchSourceConnectorType.COMPOSIO_GOOGLE_CALENDAR_CONNECTOR,
|
||||||
SearchSourceConnectorType.LUMA_CONNECTOR,
|
SearchSourceConnectorType.LUMA_CONNECTOR,
|
||||||
]:
|
]:
|
||||||
# Default to today if no end_date provided (users can manually select future dates)
|
# Default to today if no end_date provided (users can manually select future dates)
|
||||||
|
|
@ -977,6 +988,9 @@ async def index_connector_content(
|
||||||
index_composio_connector_task,
|
index_composio_connector_task,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# For Composio Gmail and Calendar, use the same date calculation logic as normal connectors
|
||||||
|
# This ensures consistent behavior and uses last_indexed_at to reduce API calls
|
||||||
|
# (includes special case: if indexed today, go back 1 day to avoid missing data)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
f"Triggering Composio connector indexing for connector {connector_id} into search space {search_space_id} from {indexing_from} to {indexing_to}"
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -112,6 +112,13 @@ def calculate_date_range(
|
||||||
Returns:
|
Returns:
|
||||||
Tuple of (start_date_str, end_date_str)
|
Tuple of (start_date_str, end_date_str)
|
||||||
"""
|
"""
|
||||||
|
# Normalize "undefined" strings to None (from frontend)
|
||||||
|
# This prevents parsing errors and ensures consistent behavior across all indexers
|
||||||
|
if start_date == "undefined" or start_date == "":
|
||||||
|
start_date = None
|
||||||
|
if end_date == "undefined" or end_date == "":
|
||||||
|
end_date = None
|
||||||
|
|
||||||
if start_date is not None and end_date is not None:
|
if start_date is not None and end_date is not None:
|
||||||
return start_date, end_date
|
return start_date, end_date
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@ Google Calendar connector indexer.
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
import pytz
|
||||||
|
from dateutil.parser import isoparse
|
||||||
from google.oauth2.credentials import Credentials
|
from google.oauth2.credentials import Credentials
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
@ -206,6 +208,23 @@ async def index_google_calendar_events(
|
||||||
start_date_str = start_date
|
start_date_str = start_date
|
||||||
end_date_str = end_date
|
end_date_str = end_date
|
||||||
|
|
||||||
|
# If start_date and end_date are the same, adjust end_date to be one day later
|
||||||
|
# to ensure valid date range (start_date must be strictly before end_date)
|
||||||
|
if start_date_str == end_date_str:
|
||||||
|
# Parse the date and add one day to ensure valid range
|
||||||
|
dt = isoparse(end_date_str)
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=pytz.UTC)
|
||||||
|
else:
|
||||||
|
dt = dt.astimezone(pytz.UTC)
|
||||||
|
# Add one day to end_date to make it strictly after start_date
|
||||||
|
dt_end = dt + timedelta(days=1)
|
||||||
|
end_date_str = dt_end.strftime("%Y-%m-%d")
|
||||||
|
logger.info(
|
||||||
|
f"Adjusted end_date from {end_date} to {end_date_str} "
|
||||||
|
f"to ensure valid date range (start_date must be strictly before end_date)"
|
||||||
|
)
|
||||||
|
|
||||||
await task_logger.log_task_progress(
|
await task_logger.log_task_progress(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
|
f"Fetching Google Calendar events from {start_date_str} to {end_date_str}",
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,13 @@ async def index_luma_events(
|
||||||
|
|
||||||
luma_client = LumaConnector(api_key=api_key)
|
luma_client = LumaConnector(api_key=api_key)
|
||||||
|
|
||||||
|
# Handle 'undefined' string from frontend (treat as None)
|
||||||
|
# This prevents "time data 'undefined' does not match format" errors
|
||||||
|
if start_date == "undefined" or start_date == "":
|
||||||
|
start_date = None
|
||||||
|
if end_date == "undefined" or end_date == "":
|
||||||
|
end_date = None
|
||||||
|
|
||||||
# Calculate date range
|
# Calculate date range
|
||||||
# For calendar connectors, allow future dates to index upcoming events
|
# For calendar connectors, allow future dates to index upcoming events
|
||||||
if start_date is None or end_date is None:
|
if start_date is None or end_date is None:
|
||||||
|
|
|
||||||
|
|
@ -259,7 +259,13 @@ export const ConnectorIndicator: FC = () => {
|
||||||
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
|
editingConnector.connector_type !== "GOOGLE_DRIVE_CONNECTOR"
|
||||||
? () => {
|
? () => {
|
||||||
startIndexing(editingConnector.id);
|
startIndexing(editingConnector.id);
|
||||||
handleQuickIndexConnector(editingConnector.id, editingConnector.connector_type, stopIndexing);
|
handleQuickIndexConnector(
|
||||||
|
editingConnector.id,
|
||||||
|
editingConnector.connector_type,
|
||||||
|
stopIndexing,
|
||||||
|
startDate,
|
||||||
|
endDate
|
||||||
|
);
|
||||||
}
|
}
|
||||||
: undefined
|
: undefined
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -272,8 +272,7 @@ export const ConnectorEditView: FC<ConnectorEditViewProps> = ({
|
||||||
Re-indexing runs in the background
|
Re-indexing runs in the background
|
||||||
</p>
|
</p>
|
||||||
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
||||||
You can continue using SurfSense while we sync your data. Check the Active tab
|
You can continue using SurfSense while we sync your data. Check inbox for updates.
|
||||||
to see progress.
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -189,8 +189,7 @@ export const IndexingConfigurationView: FC<IndexingConfigurationViewProps> = ({
|
||||||
<div className="text-xs sm:text-sm">
|
<div className="text-xs sm:text-sm">
|
||||||
<p className="font-medium text-xs sm:text-sm">Indexing runs in the background</p>
|
<p className="font-medium text-xs sm:text-sm">Indexing runs in the background</p>
|
||||||
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
<p className="text-muted-foreground mt-1 text-[10px] sm:text-sm">
|
||||||
You can continue using SurfSense while we sync your data. Check the Active tab
|
You can continue using SurfSense while we sync your data. Check inbox for updates.
|
||||||
to see progress.
|
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -1400,9 +1400,15 @@ export const useConnectorDialog = () => {
|
||||||
[editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
|
[editingConnector, searchSpaceId, deleteConnector, router, cameFromMCPList]
|
||||||
);
|
);
|
||||||
|
|
||||||
// Handle quick index (index without date picker, uses backend defaults)
|
// Handle quick index (index with selected date range, or backend defaults if none selected)
|
||||||
const handleQuickIndexConnector = useCallback(
|
const handleQuickIndexConnector = useCallback(
|
||||||
async (connectorId: number, connectorType?: string, stopIndexing?: (id: number) => void) => {
|
async (
|
||||||
|
connectorId: number,
|
||||||
|
connectorType?: string,
|
||||||
|
stopIndexing?: (id: number) => void,
|
||||||
|
startDate?: Date,
|
||||||
|
endDate?: Date
|
||||||
|
) => {
|
||||||
if (!searchSpaceId) return;
|
if (!searchSpaceId) return;
|
||||||
|
|
||||||
// Track quick index clicked event
|
// Track quick index clicked event
|
||||||
|
|
@ -1411,10 +1417,16 @@ export const useConnectorDialog = () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Format dates if provided, otherwise pass undefined (backend will use defaults)
|
||||||
|
const startDateStr = startDate ? format(startDate, "yyyy-MM-dd") : undefined;
|
||||||
|
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
|
||||||
|
|
||||||
await indexConnector({
|
await indexConnector({
|
||||||
connector_id: connectorId,
|
connector_id: connectorId,
|
||||||
queryParams: {
|
queryParams: {
|
||||||
search_space_id: searchSpaceId,
|
search_space_id: searchSpaceId,
|
||||||
|
start_date: startDateStr,
|
||||||
|
end_date: endDateStr,
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
toast.success("Indexing started", {
|
toast.success("Indexing started", {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue