diff --git a/surfsense_backend/app/agents/new_chat/tools/google_drive/create_file.py b/surfsense_backend/app/agents/new_chat/tools/google_drive/create_file.py index 90ee5ac5e..7a990c98d 100644 --- a/surfsense_backend/app/agents/new_chat/tools/google_drive/create_file.py +++ b/surfsense_backend/app/agents/new_chat/tools/google_drive/create_file.py @@ -242,12 +242,36 @@ def create_create_google_drive_file_tool( logger.info( f"Google Drive file created: id={created.get('id')}, name={created.get('name')}" ) + + kb_message_suffix = "" + try: + from app.services.google_drive import GoogleDriveKBSyncService + + kb_service = GoogleDriveKBSyncService(db_session) + kb_result = await kb_service.sync_after_create( + file_id=created.get("id"), + file_name=created.get("name", final_name), + mime_type=mime_type, + web_view_link=created.get("webViewLink"), + content=final_content, + connector_id=actual_connector_id, + search_space_id=search_space_id, + user_id=user_id, + ) + if kb_result["status"] == "success": + kb_message_suffix = " Your knowledge base has also been updated." + else: + kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync." + except Exception as kb_err: + logger.warning(f"KB sync after create failed: {kb_err}") + kb_message_suffix = " This file will be added to your knowledge base in the next scheduled sync." + return { "status": "success", "file_id": created.get("id"), "name": created.get("name"), "web_view_link": created.get("webViewLink"), - "message": f"Successfully created '{created.get('name')}' in Google Drive.", + "message": f"Successfully created '{created.get('name')}' in Google Drive.{kb_message_suffix}", } except Exception as e: diff --git a/surfsense_backend/app/services/google_drive/__init__.py b/surfsense_backend/app/services/google_drive/__init__.py index 5958a1761..0b5812083 100644 --- a/surfsense_backend/app/services/google_drive/__init__.py +++ b/surfsense_backend/app/services/google_drive/__init__.py @@ -1,3 +1,4 @@ +from app.services.google_drive.kb_sync_service import GoogleDriveKBSyncService from app.services.google_drive.tool_metadata_service import ( GoogleDriveAccount, GoogleDriveFile, @@ -7,5 +8,6 @@ from app.services.google_drive.tool_metadata_service import ( __all__ = [ "GoogleDriveAccount", "GoogleDriveFile", + "GoogleDriveKBSyncService", "GoogleDriveToolMetadataService", ] diff --git a/surfsense_backend/app/services/google_drive/kb_sync_service.py b/surfsense_backend/app/services/google_drive/kb_sync_service.py new file mode 100644 index 000000000..c47f2001d --- /dev/null +++ b/surfsense_backend/app/services/google_drive/kb_sync_service.py @@ -0,0 +1,159 @@ +import logging +from datetime import datetime + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db import Document, DocumentType +from app.services.llm_service import get_user_long_context_llm +from app.utils.document_converters import ( + create_document_chunks, + embed_text, + generate_content_hash, + generate_document_summary, + generate_unique_identifier_hash, +) + +logger = logging.getLogger(__name__) + + +class GoogleDriveKBSyncService: + def __init__(self, db_session: AsyncSession): + self.db_session = db_session + + async def sync_after_create( + self, + file_id: str, + file_name: str, + mime_type: str, + web_view_link: str | None, + content: str | None, + connector_id: int, + search_space_id: int, + user_id: str, + ) -> dict: + from app.tasks.connector_indexers.base import ( + check_document_by_unique_identifier, + check_duplicate_document_by_hash, + get_current_timestamp, + safe_set_chunks, + ) + + try: + unique_hash = generate_unique_identifier_hash( + DocumentType.GOOGLE_DRIVE_FILE, file_id, search_space_id + ) + + existing = await check_document_by_unique_identifier( + self.db_session, unique_hash + ) + if existing: + logger.info( + "Document for Drive file %s already exists (doc_id=%s), skipping", + file_id, + existing.id, + ) + return {"status": "success"} + + indexable_content = (content or "").strip() + if not indexable_content: + indexable_content = f"Google Drive file: {file_name} (type: {mime_type})" + + content_hash = generate_content_hash(indexable_content, search_space_id) + + with self.db_session.no_autoflush: + dup = await check_duplicate_document_by_hash( + self.db_session, content_hash + ) + if dup: + logger.info( + "Content-hash collision for Drive file %s — identical content " + "exists in doc %s. Using unique_identifier_hash as content_hash.", + file_id, + dup.id, + ) + content_hash = unique_hash + + user_llm = await get_user_long_context_llm( + self.db_session, + user_id, + search_space_id, + disable_streaming=True, + ) + + doc_metadata_for_summary = { + "file_name": file_name, + "mime_type": mime_type, + "document_type": "Google Drive File", + "connector_type": "Google Drive", + } + + if user_llm: + summary_content, summary_embedding = await generate_document_summary( + indexable_content, user_llm, doc_metadata_for_summary + ) + else: + logger.warning("No LLM configured — using fallback summary") + summary_content = f"Google Drive File: {file_name}\n\n{indexable_content}" + summary_embedding = embed_text(summary_content) + + chunks = await create_document_chunks(indexable_content) + now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + document = Document( + title=file_name, + document_type=DocumentType.GOOGLE_DRIVE_FILE, + document_metadata={ + "google_drive_file_id": file_id, + "google_drive_file_name": file_name, + "google_drive_mime_type": mime_type, + "web_view_link": web_view_link, + "source_connector": "google_drive", + "indexed_at": now_str, + "connector_id": connector_id, + }, + content=summary_content, + content_hash=content_hash, + unique_identifier_hash=unique_hash, + embedding=summary_embedding, + search_space_id=search_space_id, + connector_id=connector_id, + source_markdown=content, + updated_at=get_current_timestamp(), + ) + + self.db_session.add(document) + await self.db_session.flush() + await safe_set_chunks(self.db_session, document, chunks) + await self.db_session.commit() + + logger.info( + "KB sync after create succeeded: doc_id=%s, file=%s, chunks=%d", + document.id, + file_name, + len(chunks), + ) + return {"status": "success"} + + except Exception as e: + error_str = str(e).lower() + if ( + "duplicate key value violates unique constraint" in error_str + or "uniqueviolationerror" in error_str + ): + logger.warning( + "Duplicate constraint hit during KB sync for file %s. " + "Rolling back — periodic indexer will handle it. Error: %s", + file_id, + e, + ) + await self.db_session.rollback() + return {"status": "error", "message": "Duplicate document detected"} + + logger.error( + "KB sync after create failed for file %s: %s", + file_id, + e, + exc_info=True, + ) + await self.db_session.rollback() + return {"status": "error", "message": str(e)} diff --git a/surfsense_backend/app/services/google_drive/tool_metadata_service.py b/surfsense_backend/app/services/google_drive/tool_metadata_service.py index 8438344e0..e48d5f9e1 100644 --- a/surfsense_backend/app/services/google_drive/tool_metadata_service.py +++ b/surfsense_backend/app/services/google_drive/tool_metadata_service.py @@ -74,6 +74,7 @@ class GoogleDriveToolMetadataService: return { "accounts": [], "supported_types": [], + "parent_folders": {}, "error": "No Google Drive account connected", } @@ -86,9 +87,12 @@ class GoogleDriveToolMetadataService: await self._persist_auth_expired(acc.id) accounts_with_status.append(acc_dict) + parent_folders = await self._get_parent_folders_by_account(accounts_with_status) + return { "accounts": accounts_with_status, "supported_types": ["google_doc", "google_sheet"], + "parent_folders": parent_folders, } async def get_trash_context( @@ -236,3 +240,74 @@ class GoogleDriveToolMetadataService: connector_id, exc_info=True, ) + + async def _get_parent_folders_by_account( + self, accounts_with_status: list[dict] + ) -> dict[int, list[dict]]: + """Fetch root-level folders for each healthy account. + + Skips accounts where ``auth_expired`` is True so we don't waste an API + call that will fail anyway. + """ + parent_folders: dict[int, list[dict]] = {} + + for acc in accounts_with_status: + connector_id = acc["id"] + if acc.get("auth_expired"): + parent_folders[connector_id] = [] + continue + + try: + result = await self._db_session.execute( + select(SearchSourceConnector).where( + SearchSourceConnector.id == connector_id + ) + ) + connector = result.scalar_one_or_none() + if not connector: + parent_folders[connector_id] = [] + continue + + pre_built_creds = None + if ( + connector.connector_type + == SearchSourceConnectorType.COMPOSIO_GOOGLE_DRIVE_CONNECTOR + ): + cca_id = connector.config.get("composio_connected_account_id") + if cca_id: + pre_built_creds = build_composio_credentials(cca_id) + + client = GoogleDriveClient( + session=self._db_session, + connector_id=connector_id, + credentials=pre_built_creds, + ) + + folders, _, error = await client.list_files( + query="mimeType = 'application/vnd.google-apps.folder' and trashed = false and 'root' in parents", + fields="files(id, name)", + page_size=50, + ) + + if error: + logger.warning( + "Failed to list folders for connector %s: %s", + connector_id, + error, + ) + parent_folders[connector_id] = [] + else: + parent_folders[connector_id] = [ + {"folder_id": f["id"], "name": f["name"]} + for f in folders + if f.get("id") and f.get("name") + ] + except Exception: + logger.warning( + "Error fetching folders for connector %s", + connector_id, + exc_info=True, + ) + parent_folders[connector_id] = [] + + return parent_folders diff --git a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx index 8d2182c16..d65cd7404 100644 --- a/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx +++ b/surfsense_web/components/assistant-ui/connector-popup/views/connector-accounts-list-view.tsx @@ -250,11 +250,7 @@ export const ConnectorAccountsListView: FC = ({ onClick={() => handleReauth(connector.id)} disabled={reauthingId === connector.id} > - {reauthingId === connector.id ? ( - - ) : ( - - )} + Re-authenticate ) : ( diff --git a/surfsense_web/components/tool-ui/google-drive/create-file.tsx b/surfsense_web/components/tool-ui/google-drive/create-file.tsx index 5632de702..0038afd01 100644 --- a/surfsense_web/components/tool-ui/google-drive/create-file.tsx +++ b/surfsense_web/components/tool-ui/google-drive/create-file.tsx @@ -12,7 +12,6 @@ import { useParams } from "next/navigation"; import { useCallback, useEffect, useMemo, useState } from "react"; import { toast } from "sonner"; import { Button } from "@/components/ui/button"; -import { Input } from "@/components/ui/input"; import { Select, SelectContent, @@ -46,6 +45,7 @@ interface InterruptResult { context?: { accounts?: GoogleDriveAccount[]; supported_types?: string[]; + parent_folders?: Record>; error?: string; }; } @@ -153,7 +153,20 @@ function ApprovalCard({ const [selectedAccountId, setSelectedAccountId] = useState(defaultAccountId); const [selectedFileType, setSelectedFileType] = useState(args.file_type ?? "google_doc"); - const [parentFolderId, setParentFolderId] = useState(""); + const [parentFolderId, setParentFolderId] = useState("__root__"); + + const parentFolders = interruptData.context?.parent_folders ?? {}; + const availableParentFolders = useMemo(() => { + if (!selectedAccountId) return []; + return parentFolders[Number(selectedAccountId)] ?? []; + }, [selectedAccountId, parentFolders]); + + const handleAccountChange = useCallback((value: string) => { + setSelectedAccountId(value); + setParentFolderId("__root__"); + }, []); + + const fileTypeLabel = FILE_TYPE_LABELS[selectedFileType] ?? FILE_TYPE_LABELS[args.file_type] ?? "Google Drive File"; const isNameValid = useMemo( () => args.name && typeof args.name === "string" && args.name.trim().length > 0, @@ -178,7 +191,7 @@ function ApprovalCard({ ...args, file_type: selectedFileType, connector_id: selectedAccountId ? Number(selectedAccountId) : null, - parent_folder_id: parentFolderId.trim() || null, + parent_folder_id: parentFolderId === "__root__" ? null : parentFolderId, }, }, }); @@ -201,10 +214,10 @@ function ApprovalCard({

{decided === "reject" - ? "Google Drive File Rejected" + ? `${fileTypeLabel} Rejected` : decided === "approve" || decided === "edit" - ? "Google Drive File Approved" - : "Create Google Drive File"} + ? `${fileTypeLabel} Approved` + : `Create ${fileTypeLabel}`}

{decided === "reject" @@ -226,25 +239,25 @@ function ApprovalCard({ openHitlEditPanel({ title: args.name ?? "", content: args.content ?? "", - toolName: "Google Drive File", - onSave: (newName, newContent) => { - setIsPanelOpen(false); - setDecided("edit"); - onDecision({ - type: "edit", - edited_action: { - name: interruptData.action_requests[0].name, - args: { - ...args, - name: newName, - content: newContent, - file_type: selectedFileType, - connector_id: selectedAccountId ? Number(selectedAccountId) : null, - parent_folder_id: parentFolderId.trim() || null, - }, + toolName: fileTypeLabel, + onSave: (newName, newContent) => { + setIsPanelOpen(false); + setDecided("edit"); + onDecision({ + type: "edit", + edited_action: { + name: interruptData.action_requests[0].name, + args: { + ...args, + name: newName, + content: newContent, + file_type: selectedFileType, + connector_id: selectedAccountId ? Number(selectedAccountId) : null, + parent_folder_id: parentFolderId === "__root__" ? null : parentFolderId, }, - }); - }, + }, + }); + }, }); }} > @@ -268,7 +281,7 @@ function ApprovalCard({

Google Drive Account *

- @@ -306,19 +319,29 @@ function ApprovalCard({
-
-

- Parent Folder ID (optional) -

- setParentFolderId(e.target.value)} - placeholder="Leave blank to create at Drive root" - /> +
+

+ Parent Folder +

+ + {selectedAccountId && availableParentFolders.length === 0 && (

- Paste a Google Drive folder ID to place the file in a specific folder. + No folders found. File will be created at Drive root.

-
+ )} +
)} @@ -328,14 +351,9 @@ function ApprovalCard({ {/* Content preview */}
- {args.name != null && ( -

{args.name}

- )} - {args.file_type && ( -

- {FILE_TYPE_LABELS[args.file_type] ?? args.file_type} -

- )} + {args.name != null && ( +

{args.name}

+ )} {args.content != null && (