feat: added circleback connector

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-12-30 09:00:59 -08:00
parent 23870042f3
commit c19d300c9d
27 changed files with 1153 additions and 97 deletions

View file

@ -8,13 +8,12 @@ from .folder_manager import get_files_in_folder, list_folder_contents
__all__ = [
"GoogleDriveClient",
"get_valid_credentials",
"validate_credentials",
"download_and_process_file",
"get_files_in_folder",
"list_folder_contents",
"get_start_page_token",
"fetch_all_changes",
"categorize_change",
"download_and_process_file",
"fetch_all_changes",
"get_files_in_folder",
"get_start_page_token",
"get_valid_credentials",
"list_folder_contents",
"validate_credentials",
]

View file

@ -202,4 +202,3 @@ async def fetch_all_changes(
except Exception as e:
logger.error(f"Error fetching all changes: {e!s}", exc_info=True)
return all_changes, current_token, f"Error fetching all changes: {e!s}"

View file

@ -2,7 +2,6 @@
from typing import Any
from google.oauth2.credentials import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from sqlalchemy.ext.asyncio import AsyncSession
@ -107,16 +106,18 @@ class GoogleDriveClient:
"""
try:
service = await self.get_service()
file = service.files().get(fileId=file_id, fields=fields, supportsAllDrives=True).execute()
file = (
service.files()
.get(fileId=file_id, fields=fields, supportsAllDrives=True)
.execute()
)
return file, None
except HttpError as e:
return None, f"HTTP error getting file metadata: {e.resp.status}"
except Exception as e:
return None, f"Error getting file metadata: {e!s}"
async def download_file(
self, file_id: str
) -> tuple[bytes | None, str | None]:
async def download_file(self, file_id: str) -> tuple[bytes | None, str | None]:
"""
Download binary file content.
@ -164,9 +165,7 @@ class GoogleDriveClient:
try:
service = await self.get_service()
content = (
service.files()
.export(fileId=file_id, mimeType=mime_type)
.execute()
service.files().export(fileId=file_id, mimeType=mime_type).execute()
)
# Content is already bytes from the API
@ -180,4 +179,3 @@ class GoogleDriveClient:
return None, f"HTTP error exporting file: {e.resp.status}"
except Exception as e:
return None, f"Error exporting file: {e!s}"

View file

@ -78,10 +78,10 @@ async def download_and_process_file(
tmp_file.write(content_bytes)
temp_file_path = tmp_file.name
from app.db import DocumentType
from app.tasks.document_processors.file_processors import (
process_file_in_background,
)
from app.db import DocumentType
connector_info = {
"type": DocumentType.GOOGLE_DRIVE_FILE,
@ -92,7 +92,7 @@ async def download_and_process_file(
"source_connector": "google_drive",
},
}
# Add additional Drive metadata if available
if "modifiedTime" in file:
connector_info["metadata"]["modified_time"] = file["modifiedTime"]
@ -102,10 +102,12 @@ async def download_and_process_file(
connector_info["metadata"]["file_size"] = file["size"]
if "webViewLink" in file:
connector_info["metadata"]["web_view_link"] = file["webViewLink"]
if is_google_workspace_file(mime_type):
connector_info["metadata"]["exported_as"] = "pdf"
connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
connector_info["metadata"]["original_workspace_type"] = mime_type.split(
"."
)[-1]
logger.info(f"Processing {file_name} with Surfsense's file processor")
await process_file_in_background(
@ -132,5 +134,3 @@ async def download_and_process_file(
os.unlink(temp_file_path)
except Exception as e:
logger.debug(f"Could not delete temp file {temp_file_path}: {e}")

View file

@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm.attributes import flag_modified
from app.db import SearchSourceConnector, SearchSourceConnectorType
from app.db import SearchSourceConnector
async def get_valid_credentials(
@ -31,9 +31,7 @@ async def get_valid_credentials(
Exception: If token refresh fails
"""
result = await session.execute(
select(SearchSourceConnector).filter(
SearchSourceConnector.id == connector_id
)
select(SearchSourceConnector).filter(SearchSourceConnector.id == connector_id)
)
connector = result.scalars().first()
@ -95,4 +93,3 @@ def validate_credentials(credentials: Credentials) -> bool:
credentials.refresh_token,
]
)

View file

@ -26,5 +26,3 @@ def should_skip_file(mime_type: str) -> bool:
def get_export_mime_type(mime_type: str) -> str | None:
"""Get export MIME type for Google Workspace files."""
return EXPORT_FORMATS.get(mime_type)

View file

@ -24,7 +24,10 @@ async def list_folders(
"""
try:
# Build query to get only folders
query_parts = ["mimeType = 'application/vnd.google-apps.folder'", "trashed = false"]
query_parts = [
"mimeType = 'application/vnd.google-apps.folder'",
"trashed = false",
]
if parent_id:
query_parts.append(f"'{parent_id}' in parents")
@ -68,8 +71,7 @@ async def get_folder_hierarchy(
# Traverse up to root
while current_id:
file, error = await client.get_file_metadata(
current_id,
fields="id, name, parents, mimeType"
current_id, fields="id, name, parents, mimeType"
)
if error:
@ -189,7 +191,7 @@ async def list_folder_contents(
# Fetch all items with pagination (max 1000 per page)
all_items = []
page_token = None
while True:
items, next_token, error = await client.list_files(
query=query,
@ -202,10 +204,10 @@ async def list_folder_contents(
return [], error
all_items.extend(items)
if not next_token:
break
page_token = next_token
for item in all_items:
@ -226,5 +228,3 @@ async def list_folder_contents(
except Exception as e:
logger.error(f"Error listing folder contents: {e!s}", exc_info=True)
return [], f"Error listing folder contents: {e!s}"