diff --git a/docs/chinese-llm-setup.md b/docs/chinese-llm-setup.md index 37042aa2f..1fb0ce2a1 100644 --- a/docs/chinese-llm-setup.md +++ b/docs/chinese-llm-setup.md @@ -24,7 +24,7 @@ SurfSense 现已支持以下国产 LLM: 1. 登录 SurfSense Dashboard 2. 进入 **Settings** → **API Keys** (或 **LLM Configurations**) -3. 点击 **Add New Configuration** +3. 点击 **Add LLM Model** 4. 从 **Provider** 下拉菜单中选择你的国产 LLM 提供商 5. 填写必填字段(见下方各提供商详细配置) 6. 点击 **Save** diff --git a/surfsense_backend/alembic/versions/111_add_prompts_table.py b/surfsense_backend/alembic/versions/111_add_prompts_table.py index 7d4d69fd2..f61c4e298 100644 --- a/surfsense_backend/alembic/versions/111_add_prompts_table.py +++ b/surfsense_backend/alembic/versions/111_add_prompts_table.py @@ -42,7 +42,9 @@ def upgrade() -> None: ) """) op.execute("CREATE INDEX ix_prompts_user_id ON prompts (user_id)") - op.execute("CREATE INDEX ix_prompts_search_space_id ON prompts (search_space_id)") + op.execute( + "CREATE INDEX ix_prompts_search_space_id ON prompts (search_space_id)" + ) def downgrade() -> None: diff --git a/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py b/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py index a712c9a45..8dffb18dd 100644 --- a/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py +++ b/surfsense_backend/app/agents/new_chat/tools/onedrive/create_file.py @@ -81,7 +81,8 @@ def create_create_onedrive_file_tool( select(SearchSourceConnector).filter( SearchSourceConnector.search_space_id == search_space_id, SearchSourceConnector.user_id == user_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) connectors = result.scalars().all() @@ -95,12 +96,14 @@ def create_create_onedrive_file_tool( accounts = [] for c in connectors: cfg = c.config or {} - accounts.append({ - "id": c.id, - "name": c.name, - "user_email": cfg.get("user_email"), - "auth_expired": cfg.get("auth_expired", False), - }) + accounts.append( + { + "id": c.id, + "name": c.name, + "user_email": cfg.get("user_email"), + "auth_expired": cfg.get("auth_expired", False), + } + ) if all(a.get("auth_expired") for a in accounts): return { @@ -119,16 +122,22 @@ def create_create_onedrive_file_tool( client = OneDriveClient(session=db_session, connector_id=cid) items, err = await client.list_children("root") if err: - logger.warning("Failed to list folders for connector %s: %s", cid, err) + logger.warning( + "Failed to list folders for connector %s: %s", cid, err + ) parent_folders[cid] = [] else: parent_folders[cid] = [ {"folder_id": item["id"], "name": item["name"]} for item in items - if item.get("folder") is not None and item.get("id") and item.get("name") + if item.get("folder") is not None + and item.get("id") + and item.get("name") ] except Exception: - logger.warning("Error fetching folders for connector %s", cid, exc_info=True) + logger.warning( + "Error fetching folders for connector %s", cid, exc_info=True + ) parent_folders[cid] = [] context: dict[str, Any] = { @@ -152,8 +161,12 @@ def create_create_onedrive_file_tool( } ) - decisions_raw = approval.get("decisions", []) if isinstance(approval, dict) else [] - decisions = decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + decisions_raw = ( + approval.get("decisions", []) if isinstance(approval, dict) else [] + ) + decisions = ( + decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + ) decisions = [d for d in decisions if isinstance(d, dict)] if not decisions: return {"status": "error", "message": "No approval decision received"} @@ -192,7 +205,8 @@ def create_create_onedrive_file_tool( SearchSourceConnector.id == final_connector_id, SearchSourceConnector.search_space_id == search_space_id, SearchSourceConnector.user_id == user_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) connector = result.scalars().first() @@ -200,7 +214,10 @@ def create_create_onedrive_file_tool( connector = connectors[0] if not connector: - return {"status": "error", "message": "Selected OneDrive connector is invalid."} + return { + "status": "error", + "message": "Selected OneDrive connector is invalid.", + } docx_bytes = _markdown_to_docx(final_content or "") @@ -212,7 +229,9 @@ def create_create_onedrive_file_tool( mime_type=DOCX_MIME, ) - logger.info(f"OneDrive file created: id={created.get('id')}, name={created.get('name')}") + logger.info( + f"OneDrive file created: id={created.get('id')}, name={created.get('name')}" + ) kb_message_suffix = "" try: diff --git a/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py b/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py index ae7c5e306..79d8222fd 100644 --- a/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py +++ b/surfsense_backend/app/agents/new_chat/tools/onedrive/trash_file.py @@ -52,10 +52,15 @@ def create_delete_onedrive_file_tool( - If status is "not_found", relay the exact message to the user and ask them to verify the file name or check if it has been indexed. """ - logger.info(f"delete_onedrive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}") + logger.info( + f"delete_onedrive_file called: file_name='{file_name}', delete_from_kb={delete_from_kb}" + ) if db_session is None or search_space_id is None or user_id is None: - return {"status": "error", "message": "OneDrive tool not properly configured."} + return { + "status": "error", + "message": "OneDrive tool not properly configured.", + } try: doc_result = await db_session.execute( @@ -89,8 +94,12 @@ def create_delete_onedrive_file_tool( Document.search_space_id == search_space_id, Document.document_type == DocumentType.ONEDRIVE_FILE, func.lower( - cast(Document.document_metadata["onedrive_file_name"], String) - ) == func.lower(file_name), + cast( + Document.document_metadata["onedrive_file_name"], + String, + ) + ) + == func.lower(file_name), SearchSourceConnector.user_id == user_id, ) ) @@ -110,14 +119,20 @@ def create_delete_onedrive_file_tool( } if not document.connector_id: - return {"status": "error", "message": "Document has no associated connector."} + return { + "status": "error", + "message": "Document has no associated connector.", + } meta = document.document_metadata or {} file_id = meta.get("onedrive_file_id") document_id = document.id if not file_id: - return {"status": "error", "message": "File ID is missing. Please re-index the file."} + return { + "status": "error", + "message": "File ID is missing. Please re-index the file.", + } conn_result = await db_session.execute( select(SearchSourceConnector).filter( @@ -125,13 +140,17 @@ def create_delete_onedrive_file_tool( SearchSourceConnector.id == document.connector_id, SearchSourceConnector.search_space_id == search_space_id, SearchSourceConnector.user_id == user_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) ) connector = conn_result.scalars().first() if not connector: - return {"status": "error", "message": "OneDrive connector not found or access denied."} + return { + "status": "error", + "message": "OneDrive connector not found or access denied.", + } cfg = connector.config or {} if cfg.get("auth_expired"): @@ -170,8 +189,12 @@ def create_delete_onedrive_file_tool( } ) - decisions_raw = approval.get("decisions", []) if isinstance(approval, dict) else [] - decisions = decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + decisions_raw = ( + approval.get("decisions", []) if isinstance(approval, dict) else [] + ) + decisions = ( + decisions_raw if isinstance(decisions_raw, list) else [decisions_raw] + ) decisions = [d for d in decisions if isinstance(d, dict)] if not decisions: return {"status": "error", "message": "No approval decision received"} @@ -206,7 +229,8 @@ def create_delete_onedrive_file_tool( SearchSourceConnector.id == final_connector_id, SearchSourceConnector.search_space_id == search_space_id, SearchSourceConnector.user_id == user_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) ) @@ -224,10 +248,14 @@ def create_delete_onedrive_file_tool( f"Deleting OneDrive file: file_id='{final_file_id}', connector={actual_connector_id}" ) - client = OneDriveClient(session=db_session, connector_id=actual_connector_id) + client = OneDriveClient( + session=db_session, connector_id=actual_connector_id + ) await client.trash_file(final_file_id) - logger.info(f"OneDrive file deleted (moved to recycle bin): file_id={final_file_id}") + logger.info( + f"OneDrive file deleted (moved to recycle bin): file_id={final_file_id}" + ) trash_result: dict[str, Any] = { "status": "success", @@ -272,6 +300,9 @@ def create_delete_onedrive_file_tool( if isinstance(e, GraphInterrupt): raise logger.error(f"Error deleting OneDrive file: {e}", exc_info=True) - return {"status": "error", "message": "Something went wrong while trashing the file. Please try again."} + return { + "status": "error", + "message": "Something went wrong while trashing the file. Please try again.", + } return delete_onedrive_file diff --git a/surfsense_backend/app/connectors/onedrive/client.py b/surfsense_backend/app/connectors/onedrive/client.py index cc118c0c9..37c5823a3 100644 --- a/surfsense_backend/app/connectors/onedrive/client.py +++ b/surfsense_backend/app/connectors/onedrive/client.py @@ -39,7 +39,9 @@ class OneDriveClient: cfg = connector.config or {} is_encrypted = cfg.get("_token_encrypted", False) - token_encryption = TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None + token_encryption = ( + TokenEncryption(config.SECRET_KEY) if config.SECRET_KEY else None + ) access_token = cfg.get("access_token", "") refresh_token = cfg.get("refresh_token") @@ -206,18 +208,20 @@ class OneDriveClient: async def download_file_to_disk(self, item_id: str, dest_path: str) -> str | None: """Stream file content to disk. Returns error message on failure.""" token = await self._get_valid_token() - async with httpx.AsyncClient(follow_redirects=True) as client: - async with client.stream( + async with ( + httpx.AsyncClient(follow_redirects=True) as client, + client.stream( "GET", f"{GRAPH_API_BASE}/me/drive/items/{item_id}/content", headers={"Authorization": f"Bearer {token}"}, timeout=120.0, - ) as resp: - if resp.status_code != 200: - return f"Download failed: {resp.status_code}" - with open(dest_path, "wb") as f: - async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024): - f.write(chunk) + ) as resp, + ): + if resp.status_code != 200: + return f"Download failed: {resp.status_code}" + with open(dest_path, "wb") as f: + async for chunk in resp.aiter_bytes(chunk_size=5 * 1024 * 1024): + f.write(chunk) return None async def create_file( diff --git a/surfsense_backend/app/connectors/onedrive/content_extractor.py b/surfsense_backend/app/connectors/onedrive/content_extractor.py index 109a8cb15..8917ba1fd 100644 --- a/surfsense_backend/app/connectors/onedrive/content_extractor.py +++ b/surfsense_backend/app/connectors/onedrive/content_extractor.py @@ -5,6 +5,7 @@ extension-based, not provider-specific. """ import asyncio +import contextlib import logging import os import tempfile @@ -60,7 +61,9 @@ async def download_and_extract_content( temp_file_path = None try: - extension = Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin" + extension = ( + Path(file_name).suffix or get_extension_from_mime(mime_type) or ".bin" + ) with tempfile.NamedTemporaryFile(delete=False, suffix=extension) as tmp: temp_file_path = tmp.name @@ -76,10 +79,8 @@ async def download_and_extract_content( return None, metadata, str(e) finally: if temp_file_path and os.path.exists(temp_file_path): - try: + with contextlib.suppress(Exception): os.unlink(temp_file_path) - except Exception: - pass async def _parse_file_to_markdown(file_path: str, filename: str) -> str: @@ -94,9 +95,10 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str: return f.read() if lower.endswith((".mp3", ".mp4", ".mpeg", ".mpga", ".m4a", ".wav", ".webm")): - from app.config import config as app_config from litellm import atranscription + from app.config import config as app_config + stt_service_type = ( "local" if app_config.STT_SERVICE and app_config.STT_SERVICE.startswith("local/") @@ -106,9 +108,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str: from app.services.stt_service import stt_service t0 = time.monotonic() - logger.info(f"[local-stt] START file={filename} thread={threading.current_thread().name}") + logger.info( + f"[local-stt] START file={filename} thread={threading.current_thread().name}" + ) result = await asyncio.to_thread(stt_service.transcribe_file, file_path) - logger.info(f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s") + logger.info( + f"[local-stt] END file={filename} elapsed={time.monotonic() - t0:.2f}s" + ) text = result.get("text", "") else: with open(file_path, "rb") as audio_file: @@ -150,7 +156,9 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str: parse_with_llamacloud_retry, ) - result = await parse_with_llamacloud_retry(file_path=file_path, estimated_pages=50) + result = await parse_with_llamacloud_retry( + file_path=file_path, estimated_pages=50 + ) markdown_documents = await result.aget_markdown_documents(split_by_page=False) if not markdown_documents: raise RuntimeError(f"LlamaCloud returned no documents for {filename}") @@ -161,9 +169,13 @@ async def _parse_file_to_markdown(file_path: str, filename: str) -> str: converter = DocumentConverter() t0 = time.monotonic() - logger.info(f"[docling] START file={filename} thread={threading.current_thread().name}") + logger.info( + f"[docling] START file={filename} thread={threading.current_thread().name}" + ) result = await asyncio.to_thread(converter.convert, file_path) - logger.info(f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s") + logger.info( + f"[docling] END file={filename} elapsed={time.monotonic() - t0:.2f}s" + ) return result.document.export_to_markdown() raise RuntimeError(f"Unknown ETL_SERVICE: {app_config.ETL_SERVICE}") diff --git a/surfsense_backend/app/connectors/onedrive/folder_manager.py b/surfsense_backend/app/connectors/onedrive/folder_manager.py index 7f286453c..6fa725ca1 100644 --- a/surfsense_backend/app/connectors/onedrive/folder_manager.py +++ b/surfsense_backend/app/connectors/onedrive/folder_manager.py @@ -27,7 +27,10 @@ async def list_folder_contents( if item["isFolder"]: item.setdefault("mimeType", "application/vnd.ms-folder") else: - item.setdefault("mimeType", item.get("file", {}).get("mimeType", "application/octet-stream")) + item.setdefault( + "mimeType", + item.get("file", {}).get("mimeType", "application/octet-stream"), + ) items.sort(key=lambda x: (not x["isFolder"], x.get("name", "").lower())) @@ -63,7 +66,9 @@ async def get_files_in_folder( client, item["id"], include_subfolders=True ) if sub_error: - logger.warning(f"Error recursing into folder {item.get('name')}: {sub_error}") + logger.warning( + f"Error recursing into folder {item.get('name')}: {sub_error}" + ) continue files.extend(sub_files) elif not should_skip_file(item): diff --git a/surfsense_backend/app/routes/__init__.py b/surfsense_backend/app/routes/__init__.py index a2b7a154a..644ab07dc 100644 --- a/surfsense_backend/app/routes/__init__.py +++ b/surfsense_backend/app/routes/__init__.py @@ -33,9 +33,10 @@ from .new_llm_config_routes import router as new_llm_config_router from .notes_routes import router as notes_router from .notifications_routes import router as notifications_router from .notion_add_connector_route import router as notion_add_connector_router +from .onedrive_add_connector_route import router as onedrive_add_connector_router from .podcasts_routes import router as podcasts_router -from .public_chat_routes import router as public_chat_router from .prompts_routes import router as prompts_router +from .public_chat_routes import router as public_chat_router from .rbac_routes import router as rbac_router from .reports_routes import router as reports_router from .sandbox_routes import router as sandbox_router @@ -44,7 +45,6 @@ from .search_spaces_routes import router as search_spaces_router from .slack_add_connector_route import router as slack_add_connector_router from .surfsense_docs_routes import router as surfsense_docs_router from .teams_add_connector_route import router as teams_add_connector_router -from .onedrive_add_connector_route import router as onedrive_add_connector_router from .video_presentations_routes import router as video_presentations_router from .youtube_routes import router as youtube_router diff --git a/surfsense_backend/app/routes/onedrive_add_connector_route.py b/surfsense_backend/app/routes/onedrive_add_connector_route.py index 19bcbe6ff..2f41efca7 100644 --- a/surfsense_backend/app/routes/onedrive_add_connector_route.py +++ b/surfsense_backend/app/routes/onedrive_add_connector_route.py @@ -79,9 +79,13 @@ async def connect_onedrive(space_id: int, user: User = Depends(current_active_us if not space_id: raise HTTPException(status_code=400, detail="space_id is required") if not config.MICROSOFT_CLIENT_ID: - raise HTTPException(status_code=500, detail="Microsoft OneDrive OAuth not configured.") + raise HTTPException( + status_code=500, detail="Microsoft OneDrive OAuth not configured." + ) if not config.SECRET_KEY: - raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.") + raise HTTPException( + status_code=500, detail="SECRET_KEY not configured for OAuth security." + ) state_manager = get_state_manager() state_encoded = state_manager.generate_secure_state(space_id, user.id) @@ -96,14 +100,18 @@ async def connect_onedrive(space_id: int, user: User = Depends(current_active_us } auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}" - logger.info("Generated OneDrive OAuth URL for user %s, space %s", user.id, space_id) + logger.info( + "Generated OneDrive OAuth URL for user %s, space %s", user.id, space_id + ) return {"auth_url": auth_url} except HTTPException: raise except Exception as e: logger.error("Failed to initiate OneDrive OAuth: %s", str(e), exc_info=True) - raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive OAuth: {e!s}") from e + raise HTTPException( + status_code=500, detail=f"Failed to initiate OneDrive OAuth: {e!s}" + ) from e @router.get("/auth/onedrive/connector/reauth") @@ -121,15 +129,20 @@ async def reauth_onedrive( SearchSourceConnector.id == connector_id, SearchSourceConnector.user_id == user.id, SearchSourceConnector.search_space_id == space_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) connector = result.scalars().first() if not connector: - raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied") + raise HTTPException( + status_code=404, detail="OneDrive connector not found or access denied" + ) if not config.SECRET_KEY: - raise HTTPException(status_code=500, detail="SECRET_KEY not configured for OAuth security.") + raise HTTPException( + status_code=500, detail="SECRET_KEY not configured for OAuth security." + ) state_manager = get_state_manager() extra: dict = {"connector_id": connector_id} @@ -148,14 +161,20 @@ async def reauth_onedrive( } auth_url = f"{AUTHORIZATION_URL}?{urlencode(auth_params)}" - logger.info("Initiating OneDrive re-auth for user %s, connector %s", user.id, connector_id) + logger.info( + "Initiating OneDrive re-auth for user %s, connector %s", + user.id, + connector_id, + ) return {"auth_url": auth_url} except HTTPException: raise except Exception as e: logger.error("Failed to initiate OneDrive re-auth: %s", str(e), exc_info=True) - raise HTTPException(status_code=500, detail=f"Failed to initiate OneDrive re-auth: {e!s}") from e + raise HTTPException( + status_code=500, detail=f"Failed to initiate OneDrive re-auth: {e!s}" + ) from e @router.get("/auth/onedrive/connector/callback") @@ -182,10 +201,14 @@ async def onedrive_callback( return RedirectResponse( url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=onedrive_oauth_denied" ) - return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_oauth_denied") + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_oauth_denied" + ) if not code or not state: - raise HTTPException(status_code=400, detail="Missing required OAuth parameters") + raise HTTPException( + status_code=400, detail="Missing required OAuth parameters" + ) state_manager = get_state_manager() try: @@ -194,7 +217,9 @@ async def onedrive_callback( user_id = UUID(data["user_id"]) except (HTTPException, ValueError, KeyError) as e: logger.error("Invalid OAuth state: %s", str(e)) - return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=invalid_state") + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=invalid_state" + ) reauth_connector_id = data.get("connector_id") reauth_return_url = data.get("return_url") @@ -222,20 +247,26 @@ async def onedrive_callback( error_detail = error_json.get("error_description", error_detail) except Exception: pass - raise HTTPException(status_code=400, detail=f"Token exchange failed: {error_detail}") + raise HTTPException( + status_code=400, detail=f"Token exchange failed: {error_detail}" + ) token_json = token_response.json() access_token = token_json.get("access_token") refresh_token = token_json.get("refresh_token") if not access_token: - raise HTTPException(status_code=400, detail="No access token received from Microsoft") + raise HTTPException( + status_code=400, detail="No access token received from Microsoft" + ) token_encryption = get_token_encryption() expires_at = None if token_json.get("expires_in"): - expires_at = datetime.now(UTC) + timedelta(seconds=int(token_json["expires_in"])) + expires_at = datetime.now(UTC) + timedelta( + seconds=int(token_json["expires_in"]) + ) user_info: dict = {} try: @@ -248,7 +279,8 @@ async def onedrive_callback( if user_response.status_code == 200: user_data = user_response.json() user_info = { - "user_email": user_data.get("mail") or user_data.get("userPrincipalName"), + "user_email": user_data.get("mail") + or user_data.get("userPrincipalName"), "user_name": user_data.get("displayName"), } except Exception as e: @@ -256,7 +288,9 @@ async def onedrive_callback( connector_config = { "access_token": token_encryption.encrypt_token(access_token), - "refresh_token": token_encryption.encrypt_token(refresh_token) if refresh_token else None, + "refresh_token": token_encryption.encrypt_token(refresh_token) + if refresh_token + else None, "token_type": token_json.get("token_type", "Bearer"), "expires_in": token_json.get("expires_in"), "expires_at": expires_at.isoformat() if expires_at else None, @@ -273,22 +307,36 @@ async def onedrive_callback( SearchSourceConnector.id == reauth_connector_id, SearchSourceConnector.user_id == user_id, SearchSourceConnector.search_space_id == space_id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) db_connector = result.scalars().first() if not db_connector: - raise HTTPException(status_code=404, detail="Connector not found or access denied during re-auth") + raise HTTPException( + status_code=404, + detail="Connector not found or access denied during re-auth", + ) existing_delta_link = db_connector.config.get("delta_link") - db_connector.config = {**connector_config, "delta_link": existing_delta_link, "auth_expired": False} + db_connector.config = { + **connector_config, + "delta_link": existing_delta_link, + "auth_expired": False, + } flag_modified(db_connector, "config") await session.commit() await session.refresh(db_connector) - logger.info("Re-authenticated OneDrive connector %s for user %s", db_connector.id, user_id) + logger.info( + "Re-authenticated OneDrive connector %s for user %s", + db_connector.id, + user_id, + ) if reauth_return_url and reauth_return_url.startswith("/"): - return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}{reauth_return_url}") + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}{reauth_return_url}" + ) return RedirectResponse( url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={db_connector.id}" ) @@ -298,16 +346,26 @@ async def onedrive_callback( SearchSourceConnectorType.ONEDRIVE_CONNECTOR, connector_config ) is_duplicate = await check_duplicate_connector( - session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier, + session, + SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + space_id, + user_id, + connector_identifier, ) if is_duplicate: - logger.warning("Duplicate OneDrive connector for user %s, space %s", user_id, space_id) + logger.warning( + "Duplicate OneDrive connector for user %s, space %s", user_id, space_id + ) return RedirectResponse( url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?error=duplicate_account&connector=ONEDRIVE_CONNECTOR" ) connector_name = await generate_unique_connector_name( - session, SearchSourceConnectorType.ONEDRIVE_CONNECTOR, space_id, user_id, connector_identifier, + session, + SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + space_id, + user_id, + connector_identifier, ) new_connector = SearchSourceConnector( @@ -323,20 +381,30 @@ async def onedrive_callback( session.add(new_connector) await session.commit() await session.refresh(new_connector) - logger.info("Successfully created OneDrive connector %s for user %s", new_connector.id, user_id) + logger.info( + "Successfully created OneDrive connector %s for user %s", + new_connector.id, + user_id, + ) return RedirectResponse( url=f"{config.NEXT_FRONTEND_URL}/dashboard/{space_id}/connectors/callback?success=true&connector=ONEDRIVE_CONNECTOR&connectorId={new_connector.id}" ) except IntegrityError as e: await session.rollback() - logger.error("Database integrity error creating OneDrive connector: %s", str(e)) - return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=connector_creation_failed") + logger.error( + "Database integrity error creating OneDrive connector: %s", str(e) + ) + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=connector_creation_failed" + ) except HTTPException: raise except (IntegrityError, ValueError) as e: logger.error("OneDrive OAuth callback error: %s", str(e), exc_info=True) - return RedirectResponse(url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_auth_error") + return RedirectResponse( + url=f"{config.NEXT_FRONTEND_URL}/dashboard?error=onedrive_auth_error" + ) @router.get("/connectors/{connector_id}/onedrive/folders") @@ -353,28 +421,44 @@ async def list_onedrive_folders( select(SearchSourceConnector).filter( SearchSourceConnector.id == connector_id, SearchSourceConnector.user_id == user.id, - SearchSourceConnector.connector_type == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, + SearchSourceConnector.connector_type + == SearchSourceConnectorType.ONEDRIVE_CONNECTOR, ) ) connector = result.scalars().first() if not connector: - raise HTTPException(status_code=404, detail="OneDrive connector not found or access denied") + raise HTTPException( + status_code=404, detail="OneDrive connector not found or access denied" + ) onedrive_client = OneDriveClient(session, connector_id) items, error = await list_folder_contents(onedrive_client, parent_id=parent_id) if error: error_lower = error.lower() - if "401" in error or "authentication expired" in error_lower or "invalid_grant" in error_lower: + if ( + "401" in error + or "authentication expired" in error_lower + or "invalid_grant" in error_lower + ): try: if connector and not connector.config.get("auth_expired"): connector.config = {**connector.config, "auth_expired": True} flag_modified(connector, "config") await session.commit() except Exception: - logger.warning("Failed to persist auth_expired for connector %s", connector_id, exc_info=True) - raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.") - raise HTTPException(status_code=500, detail=f"Failed to list folder contents: {error}") + logger.warning( + "Failed to persist auth_expired for connector %s", + connector_id, + exc_info=True, + ) + raise HTTPException( + status_code=400, + detail="OneDrive authentication expired. Please re-authenticate.", + ) + raise HTTPException( + status_code=500, detail=f"Failed to list folder contents: {error}" + ) return {"items": items} @@ -391,8 +475,13 @@ async def list_onedrive_folders( await session.commit() except Exception: pass - raise HTTPException(status_code=400, detail="OneDrive authentication expired. Please re-authenticate.") from e - raise HTTPException(status_code=500, detail=f"Failed to list OneDrive contents: {e!s}") from e + raise HTTPException( + status_code=400, + detail="OneDrive authentication expired. Please re-authenticate.", + ) from e + raise HTTPException( + status_code=500, detail=f"Failed to list OneDrive contents: {e!s}" + ) from e async def refresh_onedrive_token( @@ -410,10 +499,15 @@ async def refresh_onedrive_token( refresh_token = token_encryption.decrypt_token(refresh_token) except Exception as e: logger.error("Failed to decrypt refresh token: %s", str(e)) - raise HTTPException(status_code=500, detail="Failed to decrypt stored refresh token") from e + raise HTTPException( + status_code=500, detail="Failed to decrypt stored refresh token" + ) from e if not refresh_token: - raise HTTPException(status_code=400, detail=f"No refresh token available for connector {connector.id}") + raise HTTPException( + status_code=400, + detail=f"No refresh token available for connector {connector.id}", + ) refresh_data = { "client_id": config.MICROSOFT_CLIENT_ID, @@ -425,8 +519,10 @@ async def refresh_onedrive_token( async with httpx.AsyncClient() as client: token_response = await client.post( - TOKEN_URL, data=refresh_data, - headers={"Content-Type": "application/x-www-form-urlencoded"}, timeout=30.0, + TOKEN_URL, + data=refresh_data, + headers={"Content-Type": "application/x-www-form-urlencoded"}, + timeout=30.0, ) if token_response.status_code != 200: @@ -439,16 +535,27 @@ async def refresh_onedrive_token( except Exception: pass error_lower = (error_detail + error_code).lower() - if "invalid_grant" in error_lower or "expired" in error_lower or "revoked" in error_lower: - raise HTTPException(status_code=401, detail="OneDrive authentication failed. Please re-authenticate.") - raise HTTPException(status_code=400, detail=f"Token refresh failed: {error_detail}") + if ( + "invalid_grant" in error_lower + or "expired" in error_lower + or "revoked" in error_lower + ): + raise HTTPException( + status_code=401, + detail="OneDrive authentication failed. Please re-authenticate.", + ) + raise HTTPException( + status_code=400, detail=f"Token refresh failed: {error_detail}" + ) token_json = token_response.json() access_token = token_json.get("access_token") new_refresh_token = token_json.get("refresh_token") if not access_token: - raise HTTPException(status_code=400, detail="No access token received from Microsoft refresh") + raise HTTPException( + status_code=400, detail="No access token received from Microsoft refresh" + ) expires_at = None expires_in = token_json.get("expires_in") diff --git a/surfsense_backend/app/routes/search_source_connectors_routes.py b/surfsense_backend/app/routes/search_source_connectors_routes.py index 7e9ac1e59..d12fa3745 100644 --- a/surfsense_backend/app/routes/search_source_connectors_routes.py +++ b/surfsense_backend/app/routes/search_source_connectors_routes.py @@ -2567,8 +2567,12 @@ async def run_onedrive_indexing( search_space_id=search_space_id, folder_count=len(items_dict.get("folders", [])), file_count=len(items_dict.get("files", [])), - folder_names=[f.get("name", "Unknown") for f in items_dict.get("folders", [])], - file_names=[f.get("name", "Unknown") for f in items_dict.get("files", [])], + folder_names=[ + f.get("name", "Unknown") for f in items_dict.get("folders", []) + ], + file_names=[ + f.get("name", "Unknown") for f in items_dict.get("files", []) + ], ) if notification: @@ -2593,7 +2597,9 @@ async def run_onedrive_indexing( ) if _is_auth_error(error_message): await _persist_auth_expired(session, connector_id) - error_message = "OneDrive authentication expired. Please re-authenticate." + error_message = ( + "OneDrive authentication expired. Please re-authenticate." + ) else: if notification: await session.refresh(notification) diff --git a/surfsense_backend/app/services/onedrive/kb_sync_service.py b/surfsense_backend/app/services/onedrive/kb_sync_service.py index 5e82950a5..962c19fc9 100644 --- a/surfsense_backend/app/services/onedrive/kb_sync_service.py +++ b/surfsense_backend/app/services/onedrive/kb_sync_service.py @@ -56,9 +56,7 @@ class OneDriveKBSyncService: indexable_content = (content or "").strip() if not indexable_content: - indexable_content = ( - f"OneDrive file: {file_name} (type: {mime_type})" - ) + indexable_content = f"OneDrive file: {file_name} (type: {mime_type})" content_hash = generate_content_hash(indexable_content, search_space_id) @@ -95,9 +93,7 @@ class OneDriveKBSyncService: ) else: logger.warning("No LLM configured — using fallback summary") - summary_content = ( - f"OneDrive File: {file_name}\n\n{indexable_content}" - ) + summary_content = f"OneDrive File: {file_name}\n\n{indexable_content}" summary_embedding = embed_text(summary_content) chunks = await create_document_chunks(indexable_content) diff --git a/surfsense_backend/app/tasks/chat/stream_new_chat.py b/surfsense_backend/app/tasks/chat/stream_new_chat.py index c1ca089d0..4b37cb69e 100644 --- a/surfsense_backend/app/tasks/chat/stream_new_chat.py +++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py @@ -1075,6 +1075,37 @@ async def _stream_agent_events( "thread_id": thread_id_str, }, ) + elif tool_name == "web_search": + xml = ( + tool_output.get("result", str(tool_output)) + if isinstance(tool_output, dict) + else str(tool_output) + ) + citations: dict[str, dict[str, str]] = {} + for m in re.finditer( + r"
{error.title}
-{error.message}
+{error.message}