Merge remote-tracking branch 'upstream/dev' into electon-desktop

This commit is contained in:
CREDO23 2026-03-29 10:41:05 +02:00
commit ab3c636bcd
85 changed files with 4642 additions and 414 deletions

View file

@ -44,6 +44,7 @@ from .search_spaces_routes import router as search_spaces_router
from .slack_add_connector_route import router as slack_add_connector_router
from .surfsense_docs_routes import router as surfsense_docs_router
from .teams_add_connector_route import router as teams_add_connector_router
from .onedrive_add_connector_route import router as onedrive_add_connector_router
from .video_presentations_routes import router as video_presentations_router
from .youtube_routes import router as youtube_router
@ -74,6 +75,7 @@ router.include_router(luma_add_connector_router)
router.include_router(notion_add_connector_router)
router.include_router(slack_add_connector_router)
router.include_router(teams_add_connector_router)
router.include_router(onedrive_add_connector_router)
router.include_router(discord_add_connector_router)
router.include_router(jira_add_connector_router)
router.include_router(confluence_add_connector_router)

View file

@ -0,0 +1,474 @@
"""
Microsoft OneDrive Connector OAuth Routes.
Endpoints:
- GET /auth/onedrive/connector/add - Initiate OAuth
- GET /auth/onedrive/connector/callback - Handle OAuth callback
- GET /auth/onedrive/connector/reauth - Re-authenticate existing connector
- GET /connectors/{connector_id}/onedrive/folders - List folder contents
"""
import logging
from datetime import UTC, datetime, timedelta
from urllib.parse import urlencode
from uuid import UUID
import httpx
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import RedirectResponse
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm.attributes import flag_modified
from app.config import config
from app.connectors.onedrive import OneDriveClient, list_folder_contents
from app.db import (
SearchSourceConnector,
SearchSourceConnectorType,
User,
get_async_session,
)
from app.users import current_active_user
from app.utils.connector_naming import (
check_duplicate_connector,
extract_identifier_from_credentials,
generate_unique_connector_name,
)
from app.utils.oauth_security import OAuthStateManager, TokenEncryption
logger = logging.getLogger(__name__)
router = APIRouter()
AUTHORIZATION_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/authorize"
TOKEN_URL = "https://login.microsoftonline.com/common/oauth2/v2.0/token"
SCOPES = [
"offline_access",
"User.Read",
"Files.Read.All",
"Files.ReadWrite.All",
]
_state_manager = None
_token_encryption = None
def get_state_manager() -> OAuthStateManager:
global _state_manager
if _state_manager is None:
if not config.SECRET_KEY:
raise ValueError("SECRET_KEY must be set for OAuth security")
_state_manager = OAuthStateManager(config.SECRET_KEY)
return _state_manager
def get_token_encryption() -> TokenEncryption:
global _token_encryption
if _token_encryption is None:
if not config.SECRET_KEY:
raise ValueError("SECRET_KEY must be set for token encryption")
_token_encryption = TokenEncryption(config.SECRET_KEY)
return _token_encryption
@router.get("/auth/onedrive/connector/add")
async def connect_onedrive(space_id: int, user: User = Depends(current_active_user)):
"""Initiate OneDrive OAuth flow."""
try:
if not space_id:
raise HTTPException(status_code=400, detail="space_id is required")
if not config.MICROSOFT_CLIENT_ID:
raise HTTPException(status_code=500, detail="Microsoft OneDrive OAuth not configured.")
if not config.SECRET_KEY:
raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.")
state_manager = get_state_manager()
state_encoded = state_manager.generate_secure_state(space_id, user.id)
auth_params = {
"client_id": config.MICROSOFT_CLIENT_ID,
"response_type": "code",
"redirect_uri": config.ONEDRIVE_REDIRECT_URI,
"response_mode": "query",
"scope": " ".join(SCOPES),
"state": state_encoded,
}
auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}"
logger.info("Generated OneDrive OAuth URL for user %s, space %s", user.id, space_id)
return {"auth_url": auth_url}
except HTTPException:
raise
except Exception as e:
logger.error("Failed to initiate OneDrive OAuth: %s", str(e), exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive OAuth: {e!s}") from e
@router.get("/auth/onedrive/connector/reauth")
async def reauth_onedrive(
space_id: int,
connector_id: int,
return_url: str | None = None,
user: User = Depends(current_active_user),
session: AsyncSession = Depends(get_async_session),
):
"""Re-authenticate an existing OneDrive connector."""
try:
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == connector_id,
SearchSourceConnector.user_id == user.id,
SearchSourceConnector.search_space_id == space_id,
SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
)
)
connector = result.scalars().first()
if not connector:
raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied")
if not config.SECRET_KEY:
raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.")
state_manager = get_state_manager()
extra: dict = {"connector_id": connector_id}
if return_url and return_url.startswith("/"):
extra["return_url"] = return_url
state_encoded = state_manager.generate_secure_state(space_id, user.id, **extra)
auth_params = {
"client_id": config.MICROSOFT_CLIENT_ID,
"response_type": "code",
"redirect_uri": config.ONEDRIVE_REDIRECT_URI,
"response_mode": "query",
"scope": " ".join(SCOPES),
"state": state_encoded,
"prompt": "consent",
}
auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}"
logger.info("Initiating OneDrive re-auth for user %s, connector %s", user.id, connector_id)
return {"auth_url": auth_url}
except HTTPException:
raise
except Exception as e:
logger.error("Failed to initiate OneDrive re-auth: %s", str(e), exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive re-auth: {e!s}") from e
@router.get("/auth/onedrive/connector/callback")
async def onedrive_callback(
code: str | None = None,
error: str | None = None,
error_description: str | None = None,
state: str | None = None,
session: AsyncSession = Depends(get_async_session),
):
"""Handle OneDrive OAuth callback."""
try:
if error:
error_msg = error_description or error
logger.warning("OneDrive OAuth error: %s", error_msg)
space_id = None
if state:
try:
data = get_state_manager().validate_state(state)
space_id = data.get("space_id")
except Exception:
pass
if space_id:
return RedirectResponse(
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=onedrive_oauth_denied"
)
return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_oauth_denied")
if not code or not state:
raise HTTPException(status_code=400, detail="Missing required OAuth parameters")
state_manager = get_state_manager()
try:
data = state_manager.validate_state(state)
space_id = data["space_id"]
user_id = UUID(data["user_id"])
except (HTTPException, ValueError, KeyError) as e:
logger.error("Invalid OAuth state: %s", str(e))
return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=invalid_state")
reauth_connector_id = data.get("connector_id")
reauth_return_url = data.get("return_url")
token_data = {
"client_id": config.MICROSOFT_CLIENT_ID,
"client_secret": config.MICROSOFT_CLIENT_SECRET,
"code": code,
"redirect_uri": config.ONEDRIVE_REDIRECT_URI,
"grant_type": "authorization_code",
}
async with httpx.AsyncClient() as client:
token_response = await client.post(
TOKEN_URL,
data=token_data,
headers={"Content-Type": "application/x-www-form-urlencoded"},
timeout=30.0,
)
if token_response.status_code != 200:
error_detail = token_response.text
try:
error_json = token_response.json()
error_detail = error_json.get("error_description", error_detail)
except Exception:
pass
raise HTTPException(status_code=400, detail=f"Token exchange failed: {error_detail}")
token_json = token_response.json()
access_token = token_json.get("access_token")
refresh_token = token_json.get("refresh_token")
if not access_token:
raise HTTPException(status_code=400, detail="No access token received from Microsoft")
token_encryption = get_token_encryption()
expires_at = None
if token_json.get("expires_in"):
expires_at = datetime.now(UTC) + timedelta(seconds=int(token_json["expires_in"]))
user_info: dict = {}
try:
async with httpx.AsyncClient() as client:
user_response = await client.get(
"https://graph.microsoft.com/v1.0/me",
headers={"Authorization": f"Bearer {access_token}"},
timeout=30.0,
)
if user_response.status_code == 200:
user_data = user_response.json()
user_info = {
"user_email": user_data.get("mail") or user_data.get("userPrincipalName"),
"user_name": user_data.get("displayName"),
}
except Exception as e:
logger.warning("Failed to fetch user info from Graph: %s", str(e))
connector_config = {
"access_token": token_encryption.encrypt_token(access_token),
"refresh_token": token_encryption.encrypt_token(refresh_token) if refresh_token else None,
"token_type": token_json.get("token_type", "Bearer"),
"expires_in": token_json.get("expires_in"),
"expires_at": expires_at.isoformat() if expires_at else None,
"scope": token_json.get("scope"),
"user_email": user_info.get("user_email"),
"user_name": user_info.get("user_name"),
"_token_encrypted": True,
}
# Handle re-authentication
if reauth_connector_id:
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == reauth_connector_id,
SearchSourceConnector.user_id == user_id,
SearchSourceConnector.search_space_id == space_id,
SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
)
)
db_connector = result.scalars().first()
if not db_connector:
raise HTTPException(status_code=404, detail="Connector not found or access denied during re-auth")
existing_delta_link = db_connector.config.get("delta_link")
db_connector.config = {**connector_config, "delta_link": existing_delta_link, "auth_expired": False}
flag_modified(db_connector, "config")
await session.commit()
await session.refresh(db_connector)
logger.info("Re-authenticated OneDrive connector %s for user %s", db_connector.id, user_id)
if reauth_return_url and reauth_return_url.startswith("/"):
return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}{reauth_return_url}")
return RedirectResponse(
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={db_connector.id}"
)
# New connector -- check for duplicates
connector_identifier = extract_identifier_from_credentials(
SearchSourceConnectorType.ONEDRIVE_CONNECTOR, connector_config
)
is_duplicate = await check_duplicate_connector(
session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier,
)
if is_duplicate:
logger.warning("Duplicate OneDrive connector for user %s, space %s", user_id, space_id)
return RedirectResponse(
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=duplicate_account&connector=ONEDRIVE_CONNECTOR"
)
connector_name = await generate_unique_connector_name(
session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier,
)
new_connector = SearchSourceConnector(
name=connector_name,
connector_type=SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
is_indexable=True,
config=connector_config,
search_space_id=space_id,
user_id=user_id,
)
try:
session.add(new_connector)
await session.commit()
await session.refresh(new_connector)
logger.info("Successfully created OneDrive connector %s for user %s", new_connector.id, user_id)
return RedirectResponse(
url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={new_connector.id}"
)
except IntegrityError as e:
await session.rollback()
logger.error("Database integrity error creating OneDrive connector: %s", str(e))
return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=connector_creation_failed")
except HTTPException:
raise
except (IntegrityError, ValueError) as e:
logger.error("OneDrive OAuth callback error: %s", str(e), exc_info=True)
return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_auth_error")
@router.get("/connectors/{connector_id}/onedrive/folders")
async def list_onedrive_folders(
connector_id: int,
parent_id: str | None = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user),
):
"""List folders and files in user's OneDrive."""
connector = None
try:
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == connector_id,
SearchSourceConnector.user_id == user.id,
SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR,
)
)
connector = result.scalars().first()
if not connector:
raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied")
onedrive_client = OneDriveClient(session, connector_id)
items, error = await list_folder_contents(onedrive_client, parent_id=parent_id)
if error:
error_lower = error.lower()
if "401" in error or "authentication expired" in error_lower or "invalid_grant" in error_lower:
try:
if connector and not connector.config.get("auth_expired"):
connector.config = {**connector.config, "auth_expired": True}
flag_modified(connector, "config")
await session.commit()
except Exception:
logger.warning("Failed to persist auth_expired for connector %s", connector_id, exc_info=True)
raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.")
raise HTTPException(status_code=500, detail=f"Failed to list folder contents: {error}")
return {"items": items}
except HTTPException:
raise
except Exception as e:
logger.error("Error listing OneDrive contents: %s", str(e), exc_info=True)
error_lower = str(e).lower()
if "401" in str(e) or "authentication expired" in error_lower:
try:
if connector and not connector.config.get("auth_expired"):
connector.config = {**connector.config, "auth_expired": True}
flag_modified(connector, "config")
await session.commit()
except Exception:
pass
raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.") from e
raise HTTPException(status_code=500, detail=f"Failed to list OneDrive contents: {e!s}") from e
async def refresh_onedrive_token(
session: AsyncSession, connector: SearchSourceConnector
) -> SearchSourceConnector:
"""Refresh OneDrive OAuth tokens."""
logger.info("Refreshing OneDrive OAuth tokens for connector %s", connector.id)
token_encryption = get_token_encryption()
is_encrypted = connector.config.get("_token_encrypted", False)
refresh_token = connector.config.get("refresh_token")
if is_encrypted and refresh_token:
try:
refresh_token = token_encryption.decrypt_token(refresh_token)
except Exception as e:
logger.error("Failed to decrypt refresh token: %s", str(e))
raise HTTPException(status_code=500, detail="Failed to decrypt stored refresh token") from e
if not refresh_token:
raise HTTPException(status_code=400, detail=f"No refresh token available for connector {connector.id}")
refresh_data = {
"client_id": config.MICROSOFT_CLIENT_ID,
"client_secret": config.MICROSOFT_CLIENT_SECRET,
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"scope": " ".join(SCOPES),
}
async with httpx.AsyncClient() as client:
token_response = await client.post(
TOKEN_URL, data=refresh_data,
headers={"Content-Type": "application/x-www-form-urlencoded"}, timeout=30.0,
)
if token_response.status_code != 200:
error_detail = token_response.text
error_code = ""
try:
error_json = token_response.json()
error_detail = error_json.get("error_description", error_detail)
error_code = error_json.get("error", "")
except Exception:
pass
error_lower = (error_detail + error_code).lower()
if "invalid_grant" in error_lower or "expired" in error_lower or "revoked" in error_lower:
raise HTTPException(status_code=401, detail="OneDrive authentication failed. Please re-authenticate.")
raise HTTPException(status_code=400, detail=f"Token refresh failed: {error_detail}")
token_json = token_response.json()
access_token = token_json.get("access_token")
new_refresh_token = token_json.get("refresh_token")
if not access_token:
raise HTTPException(status_code=400, detail="No access token received from Microsoft refresh")
expires_at = None
expires_in = token_json.get("expires_in")
if expires_in:
expires_at = datetime.now(UTC) + timedelta(seconds=int(expires_in))
cfg = dict(connector.config)
cfg["access_token"] = token_encryption.encrypt_token(access_token)
if new_refresh_token:
cfg["refresh_token"] = token_encryption.encrypt_token(new_refresh_token)
cfg["expires_in"] = expires_in
cfg["expires_at"] = expires_at.isoformat() if expires_at else None
cfg["scope"] = token_json.get("scope")
cfg["_token_encrypted"] = True
cfg.pop("auth_expired", None)
connector.config = cfg
flag_modified(connector, "config")
await session.commit()
await session.refresh(connector)
logger.info("Successfully refreshed OneDrive tokens for connector %s", connector.id)
return connector

View file

@ -999,6 +999,53 @@ async def index_connector_content(
)
response_message = "Google Drive indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR:
from app.tasks.celery_tasks.connector_tasks import (
index_onedrive_files_task,
)
if drive_items and drive_items.has_items():
logger.info(
f"Triggering OneDrive indexing for connector {connector_id} into search space {search_space_id}, "
f"folders: {len(drive_items.folders)}, files: {len(drive_items.files)}"
)
items_dict = drive_items.model_dump()
else:
config = connector.config or {}
selected_folders = config.get("selected_folders", [])
selected_files = config.get("selected_files", [])
if not selected_folders and not selected_files:
raise HTTPException(
status_code=400,
detail="OneDrive indexing requires folders or files to be configured. "
"Please select folders/files to index.",
)
indexing_options = config.get(
"indexing_options",
{
"max_files_per_folder": 100,
"incremental_sync": True,
"include_subfolders": True,
},
)
items_dict = {
"folders": selected_folders,
"files": selected_files,
"indexing_options": indexing_options,
}
logger.info(
f"Triggering OneDrive indexing for connector {connector_id} into search space {search_space_id} "
f"using existing config"
)
index_onedrive_files_task.delay(
connector_id,
search_space_id,
str(user.id),
items_dict,
)
response_message = "OneDrive indexing started in the background."
elif connector.connector_type == SearchSourceConnectorType.DISCORD_CONNECTOR:
from app.tasks.celery_tasks.connector_tasks import (
index_discord_messages_task,
@ -2489,6 +2536,108 @@ async def run_google_drive_indexing(
logger.error(f"Failed to update notification: {notif_error!s}")
async def run_onedrive_indexing(
session: AsyncSession,
connector_id: int,
search_space_id: int,
user_id: str,
items_dict: dict,
):
"""Runs the OneDrive indexing task for folders and files with notifications."""
from uuid import UUID
notification = None
try:
from app.tasks.connector_indexers.onedrive_indexer import index_onedrive_files
connector_result = await session.execute(
select(SearchSourceConnector).where(
SearchSourceConnector.id == connector_id
)
)
connector = connector_result.scalar_one_or_none()
if connector:
notification = await NotificationService.connector_indexing.notify_google_drive_indexing_started(
session=session,
user_id=UUID(user_id),
connector_id=connector_id,
connector_name=connector.name,
connector_type=connector.connector_type.value,
search_space_id=search_space_id,
folder_count=len(items_dict.get("folders", [])),
file_count=len(items_dict.get("files", [])),
folder_names=[f.get("name", "Unknown") for f in items_dict.get("folders", [])],
file_names=[f.get("name", "Unknown") for f in items_dict.get("files", [])],
)
if notification:
await NotificationService.connector_indexing.notify_indexing_progress(
session=session,
notification=notification,
indexed_count=0,
stage="fetching",
)
total_indexed, total_skipped, error_message = await index_onedrive_files(
session,
connector_id,
search_space_id,
user_id,
items_dict,
)
if error_message:
logger.error(
f"OneDrive indexing completed with errors for connector {connector_id}: {error_message}"
)
if _is_auth_error(error_message):
await _persist_auth_expired(session, connector_id)
error_message = "OneDrive authentication expired. Please re-authenticate."
else:
if notification:
await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_progress(
session=session,
notification=notification,
indexed_count=total_indexed,
stage="storing",
)
logger.info(
f"OneDrive indexing successful for connector {connector_id}. Indexed {total_indexed} documents."
)
await _update_connector_timestamp_by_id(session, connector_id)
await session.commit()
if notification:
await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=total_indexed,
error_message=error_message,
skipped_count=total_skipped,
)
except Exception as e:
logger.error(
f"Critical error in run_onedrive_indexing for connector {connector_id}: {e}",
exc_info=True,
)
if notification:
try:
await session.refresh(notification)
await NotificationService.connector_indexing.notify_indexing_completed(
session=session,
notification=notification,
indexed_count=0,
error_message=str(e),
)
except Exception as notif_error:
logger.error(f"Failed to update notification: {notif_error!s}")
# Add new helper functions for luma indexing
async def run_luma_indexing_with_new_session(
connector_id: int,

View file

@ -88,7 +88,7 @@ async def connect_teams(space_id: int, user: User = Depends(current_active_user)
if not space_id:
raise HTTPException(status_code=400, detail="space_id is required")
if not config.TEAMS_CLIENT_ID:
if not config.MICROSOFT_CLIENT_ID:
raise HTTPException(
status_code=500, detail="Microsoft Teams OAuth not configured."
)
@ -106,7 +106,7 @@ async def connect_teams(space_id: int, user: User = Depends(current_active_user)
from urllib.parse import urlencode
auth_params = {
"client_id": config.TEAMS_CLIENT_ID,
"client_id": config.MICROSOFT_CLIENT_ID,
"response_type": "code",
"redirect_uri": config.TEAMS_REDIRECT_URI,
"response_mode": "query",
@ -181,8 +181,8 @@ async def teams_callback(
# Exchange authorization code for access token
token_data = {
"client_id": config.TEAMS_CLIENT_ID,
"client_secret": config.TEAMS_CLIENT_SECRET,
"client_id": config.MICROSOFT_CLIENT_ID,
"client_secret": config.MICROSOFT_CLIENT_SECRET,
"code": code,
"redirect_uri": config.TEAMS_REDIRECT_URI,
"grant_type": "authorization_code",
@ -403,8 +403,8 @@ async def refresh_teams_token(
# Microsoft uses oauth2/v2.0/token for token refresh
refresh_data = {
"client_id": config.TEAMS_CLIENT_ID,
"client_secret": config.TEAMS_CLIENT_SECRET,
"client_id": config.MICROSOFT_CLIENT_ID,
"client_secret": config.MICROSOFT_CLIENT_SECRET,
"grant_type": "refresh_token",
"refresh_token": refresh_token,
"scope": " ".join(SCOPES),