mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-17 18:35:19 +02:00
feat: added circleback connector
This commit is contained in:
parent
23870042f3
commit
c19d300c9d
27 changed files with 1153 additions and 97 deletions
|
|
@ -8,13 +8,12 @@ from .folder_manager import get_files_in_folder, list_folder_contents
|
|||
|
||||
__all__ = [
|
||||
"GoogleDriveClient",
|
||||
"get_valid_credentials",
|
||||
"validate_credentials",
|
||||
"download_and_process_file",
|
||||
"get_files_in_folder",
|
||||
"list_folder_contents",
|
||||
"get_start_page_token",
|
||||
"fetch_all_changes",
|
||||
"categorize_change",
|
||||
"download_and_process_file",
|
||||
"fetch_all_changes",
|
||||
"get_files_in_folder",
|
||||
"get_start_page_token",
|
||||
"get_valid_credentials",
|
||||
"list_folder_contents",
|
||||
"validate_credentials",
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -202,4 +202,3 @@ async def fetch_all_changes(
|
|||
except Exception as e:
|
||||
logger.error(f"Error fetching all changes: {e!s}", exc_info=True)
|
||||
return all_changes, current_token, f"Error fetching all changes: {e!s}"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from google.oauth2.credentials import Credentials
|
||||
from googleapiclient.discovery import build
|
||||
from googleapiclient.errors import HttpError
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
|
@ -107,16 +106,18 @@ class GoogleDriveClient:
|
|||
"""
|
||||
try:
|
||||
service = await self.get_service()
|
||||
file = service.files().get(fileId=file_id, fields=fields, supportsAllDrives=True).execute()
|
||||
file = (
|
||||
service.files()
|
||||
.get(fileId=file_id, fields=fields, supportsAllDrives=True)
|
||||
.execute()
|
||||
)
|
||||
return file, None
|
||||
except HttpError as e:
|
||||
return None, f"HTTP error getting file metadata: {e.resp.status}"
|
||||
except Exception as e:
|
||||
return None, f"Error getting file metadata: {e!s}"
|
||||
|
||||
async def download_file(
|
||||
self, file_id: str
|
||||
) -> tuple[bytes | None, str | None]:
|
||||
async def download_file(self, file_id: str) -> tuple[bytes | None, str | None]:
|
||||
"""
|
||||
Download binary file content.
|
||||
|
||||
|
|
@ -164,9 +165,7 @@ class GoogleDriveClient:
|
|||
try:
|
||||
service = await self.get_service()
|
||||
content = (
|
||||
service.files()
|
||||
.export(fileId=file_id, mimeType=mime_type)
|
||||
.execute()
|
||||
service.files().export(fileId=file_id, mimeType=mime_type).execute()
|
||||
)
|
||||
|
||||
# Content is already bytes from the API
|
||||
|
|
@ -180,4 +179,3 @@ class GoogleDriveClient:
|
|||
return None, f"HTTP error exporting file: {e.resp.status}"
|
||||
except Exception as e:
|
||||
return None, f"Error exporting file: {e!s}"
|
||||
|
||||
|
|
|
|||
|
|
@ -78,10 +78,10 @@ async def download_and_process_file(
|
|||
tmp_file.write(content_bytes)
|
||||
temp_file_path = tmp_file.name
|
||||
|
||||
from app.db import DocumentType
|
||||
from app.tasks.document_processors.file_processors import (
|
||||
process_file_in_background,
|
||||
)
|
||||
from app.db import DocumentType
|
||||
|
||||
connector_info = {
|
||||
"type": DocumentType.GOOGLE_DRIVE_FILE,
|
||||
|
|
@ -92,7 +92,7 @@ async def download_and_process_file(
|
|||
"source_connector": "google_drive",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Add additional Drive metadata if available
|
||||
if "modifiedTime" in file:
|
||||
connector_info["metadata"]["modified_time"] = file["modifiedTime"]
|
||||
|
|
@ -102,10 +102,12 @@ async def download_and_process_file(
|
|||
connector_info["metadata"]["file_size"] = file["size"]
|
||||
if "webViewLink" in file:
|
||||
connector_info["metadata"]["web_view_link"] = file["webViewLink"]
|
||||
|
||||
|
||||
if is_google_workspace_file(mime_type):
|
||||
connector_info["metadata"]["exported_as"] = "pdf"
|
||||
connector_info["metadata"]["original_workspace_type"] = mime_type.split(".")[-1]
|
||||
connector_info["metadata"]["original_workspace_type"] = mime_type.split(
|
||||
"."
|
||||
)[-1]
|
||||
|
||||
logger.info(f"Processing {file_name} with Surfsense's file processor")
|
||||
await process_file_in_background(
|
||||
|
|
@ -132,5 +134,3 @@ async def download_and_process_file(
|
|||
os.unlink(temp_file_path)
|
||||
except Exception as e:
|
||||
logger.debug(f"Could not delete temp file {temp_file_path}: {e}")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|||
from sqlalchemy.future import select
|
||||
from sqlalchemy.orm.attributes import flag_modified
|
||||
|
||||
from app.db import SearchSourceConnector, SearchSourceConnectorType
|
||||
from app.db import SearchSourceConnector
|
||||
|
||||
|
||||
async def get_valid_credentials(
|
||||
|
|
@ -31,9 +31,7 @@ async def get_valid_credentials(
|
|||
Exception: If token refresh fails
|
||||
"""
|
||||
result = await session.execute(
|
||||
select(SearchSourceConnector).filter(
|
||||
SearchSourceConnector.id == connector_id
|
||||
)
|
||||
select(SearchSourceConnector).filter(SearchSourceConnector.id == connector_id)
|
||||
)
|
||||
connector = result.scalars().first()
|
||||
|
||||
|
|
@ -95,4 +93,3 @@ def validate_credentials(credentials: Credentials) -> bool:
|
|||
credentials.refresh_token,
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -26,5 +26,3 @@ def should_skip_file(mime_type: str) -> bool:
|
|||
def get_export_mime_type(mime_type: str) -> str | None:
|
||||
"""Get export MIME type for Google Workspace files."""
|
||||
return EXPORT_FORMATS.get(mime_type)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,10 @@ async def list_folders(
|
|||
"""
|
||||
try:
|
||||
# Build query to get only folders
|
||||
query_parts = ["mimeType = 'application/vnd.google-apps.folder'", "trashed = false"]
|
||||
query_parts = [
|
||||
"mimeType = 'application/vnd.google-apps.folder'",
|
||||
"trashed = false",
|
||||
]
|
||||
|
||||
if parent_id:
|
||||
query_parts.append(f"'{parent_id}' in parents")
|
||||
|
|
@ -68,8 +71,7 @@ async def get_folder_hierarchy(
|
|||
# Traverse up to root
|
||||
while current_id:
|
||||
file, error = await client.get_file_metadata(
|
||||
current_id,
|
||||
fields="id, name, parents, mimeType"
|
||||
current_id, fields="id, name, parents, mimeType"
|
||||
)
|
||||
|
||||
if error:
|
||||
|
|
@ -189,7 +191,7 @@ async def list_folder_contents(
|
|||
# Fetch all items with pagination (max 1000 per page)
|
||||
all_items = []
|
||||
page_token = None
|
||||
|
||||
|
||||
while True:
|
||||
items, next_token, error = await client.list_files(
|
||||
query=query,
|
||||
|
|
@ -202,10 +204,10 @@ async def list_folder_contents(
|
|||
return [], error
|
||||
|
||||
all_items.extend(items)
|
||||
|
||||
|
||||
if not next_token:
|
||||
break
|
||||
|
||||
|
||||
page_token = next_token
|
||||
|
||||
for item in all_items:
|
||||
|
|
@ -226,5 +228,3 @@ async def list_folder_contents(
|
|||
except Exception as e:
|
||||
logger.error(f"Error listing folder contents: {e!s}", exc_info=True)
|
||||
return [], f"Error listing folder contents: {e!s}"
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue