""" Confluence OAuth Connector. Handles OAuth-based authentication and token refresh for Confluence API access. """ import logging from typing import Any import httpx from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select from app.config import config from app.connectors.confluence_connector import ConfluenceConnector from app.db import SearchSourceConnector from app.routes.confluence_add_connector_route import refresh_confluence_token from app.schemas.atlassian_auth_credentials import AtlassianAuthCredentialsBase from app.utils.oauth_security import TokenEncryption logger = logging.getLogger(__name__) class ConfluenceHistoryConnector: """ Confluence connector with OAuth support and automatic token refresh. This connector uses OAuth 2.0 access tokens to authenticate with the Confluence API. It automatically refreshes expired tokens when needed. Also supports legacy API token authentication for backward compatibility. """ def __init__( self, session: AsyncSession, connector_id: int, credentials: AtlassianAuthCredentialsBase | None = None, ): """ Initialize the ConfluenceHistoryConnector with auto-refresh capability. Args: session: Database session for updating connector connector_id: Connector ID for direct updates credentials: Confluence OAuth credentials (optional, will be loaded from DB if not provided) """ self._session = session self._connector_id = connector_id self._credentials = credentials self._cloud_id: str | None = None self._base_url: str | None = None self._http_client: httpx.AsyncClient | None = None self._use_oauth = True self._legacy_email: str | None = None self._legacy_api_token: str | None = None self._legacy_confluence_client: ConfluenceConnector | None = None async def _get_valid_token(self) -> str: """ Get valid Confluence access token, refreshing if needed. Returns: Valid access token Raises: ValueError: If credentials are missing or invalid Exception: If token refresh fails """ # Load credentials from DB if not provided if self._credentials is None: result = await self._session.execute( select(SearchSourceConnector).filter( SearchSourceConnector.id == self._connector_id ) ) connector = result.scalars().first() if not connector: raise ValueError(f"Connector {self._connector_id} not found") config_data = connector.config.copy() # Check if using OAuth or legacy API token is_oauth = config_data.get("_token_encrypted", False) or config_data.get( "access_token" ) if is_oauth: # OAuth 2.0 authentication # Check if access_token exists before processing raw_access_token = config_data.get("access_token") if not raw_access_token: raise ValueError( "Confluence access token not found. " "Please reconnect your Confluence account." ) # Decrypt credentials if they are encrypted token_encrypted = config_data.get("_token_encrypted", False) if token_encrypted and config.SECRET_KEY: try: token_encryption = TokenEncryption(config.SECRET_KEY) # Decrypt sensitive fields if config_data.get("access_token"): config_data["access_token"] = ( token_encryption.decrypt_token( config_data["access_token"] ) ) if config_data.get("refresh_token"): config_data["refresh_token"] = ( token_encryption.decrypt_token( config_data["refresh_token"] ) ) logger.info( f"Decrypted Confluence credentials for connector {self._connector_id}" ) except Exception as e: logger.error( f"Failed to decrypt Confluence credentials for connector {self._connector_id}: {e!s}" ) raise ValueError( f"Failed to decrypt Confluence credentials: {e!s}" ) from e # Final validation after decryption final_token = config_data.get("access_token") if not final_token or ( isinstance(final_token, str) and not final_token.strip() ): raise ValueError( "Confluence access token is invalid or empty. " "Please reconnect your Confluence account." ) try: self._credentials = AtlassianAuthCredentialsBase.from_dict( config_data ) # Store cloud_id and base_url for API calls (with backward compatibility for site_url) self._cloud_id = config_data.get("cloud_id") self._base_url = config_data.get("base_url") or config_data.get( "site_url" ) self._use_oauth = True except Exception as e: raise ValueError( f"Invalid Confluence OAuth credentials: {e!s}" ) from e else: # Legacy API token authentication self._legacy_email = config_data.get("CONFLUENCE_EMAIL") self._legacy_api_token = config_data.get("CONFLUENCE_API_TOKEN") self._base_url = config_data.get("CONFLUENCE_BASE_URL") self._use_oauth = False if ( not self._legacy_email or not self._legacy_api_token or not self._base_url ): raise ValueError( "Confluence credentials not found in connector config" ) # Check if token is expired and refreshable (only for OAuth) if ( self._use_oauth and self._credentials.is_expired and self._credentials.is_refreshable ): try: logger.info( f"Confluence token expired for connector {self._connector_id}, refreshing..." ) # Get connector for refresh result = await self._session.execute( select(SearchSourceConnector).filter( SearchSourceConnector.id == self._connector_id ) ) connector = result.scalars().first() if not connector: raise RuntimeError( f"Connector {self._connector_id} not found; cannot refresh token." ) # Refresh token connector = await refresh_confluence_token(self._session, connector) # Reload credentials after refresh config_data = connector.config.copy() token_encrypted = config_data.get("_token_encrypted", False) if token_encrypted and config.SECRET_KEY: token_encryption = TokenEncryption(config.SECRET_KEY) if config_data.get("access_token"): config_data["access_token"] = token_encryption.decrypt_token( config_data["access_token"] ) if config_data.get("refresh_token"): config_data["refresh_token"] = token_encryption.decrypt_token( config_data["refresh_token"] ) self._credentials = AtlassianAuthCredentialsBase.from_dict(config_data) self._cloud_id = config_data.get("cloud_id") # Handle backward compatibility: check both base_url and site_url self._base_url = config_data.get("base_url") or config_data.get( "site_url" ) # Invalidate cached client so it's recreated with new token if self._http_client: await self._http_client.aclose() self._http_client = None logger.info( f"Successfully refreshed Confluence token for connector {self._connector_id}" ) except Exception as e: logger.error( f"Failed to refresh Confluence token for connector {self._connector_id}: {e!s}" ) raise Exception( f"Failed to refresh Confluence OAuth credentials: {e!s}" ) from e if self._use_oauth: return self._credentials.access_token else: # For legacy auth, return empty string (not used for token-based auth) return "" async def _get_client(self) -> httpx.AsyncClient: """ Get or create HTTP client with valid token. Returns: httpx.AsyncClient instance """ if self._http_client is None: self._http_client = httpx.AsyncClient(timeout=30.0) return self._http_client async def _get_legacy_client(self) -> ConfluenceConnector: """ Get or create ConfluenceConnector with legacy credentials. Returns: ConfluenceConnector instance """ if self._legacy_confluence_client is None: self._legacy_confluence_client = ConfluenceConnector( base_url=self._base_url, email=self._legacy_email, api_token=self._legacy_api_token, ) return self._legacy_confluence_client async def _get_base_url(self) -> str: """ Get the base URL for Confluence API calls. Returns: Base URL string """ if not self._use_oauth: # For legacy auth, use the base_url directly return self._base_url or "" if not self._cloud_id: raise ValueError("Cloud ID not available. Cannot construct API URL.") # Use the Atlassian API format: https://api.atlassian.com/ex/confluence/{cloudid} return f"https://api.atlassian.com/ex/confluence/{self._cloud_id}" async def _make_api_request( self, endpoint: str, params: dict[str, Any] | None = None ) -> dict[str, Any]: """ Make a request to the Confluence API. Args: endpoint: API endpoint (without base URL) params: Query parameters for the request (optional) Returns: Response data from the API Raises: ValueError: If credentials have not been set Exception: If the API request fails """ if not self._use_oauth: # Use legacy ConfluenceConnector for API requests client = await self._get_legacy_client() # ConfluenceConnector uses synchronous requests, so we need to handle this differently # For now, we'll use the legacy client's make_api_request method # But since it's sync, we'll need to wrap it import asyncio loop = asyncio.get_event_loop() return await loop.run_in_executor( None, client.make_api_request, endpoint, params ) # OAuth flow token = await self._get_valid_token() base_url = await self._get_base_url() http_client = await self._get_client() url = f"{base_url}/wiki/api/v2/{endpoint}" headers = { "Content-Type": "application/json", "Authorization": f"Bearer {token}", "Accept": "application/json", } try: response = await http_client.get(url, headers=headers, params=params) response.raise_for_status() return response.json() except httpx.HTTPStatusError as e: # Enhanced error logging to see the actual error error_detail = { "status_code": e.response.status_code, "url": str(e.request.url), "response_text": e.response.text, "headers": dict(e.response.headers), } logger.error(f"Confluence API HTTP error: {error_detail}") raise Exception( f"Confluence API request failed (HTTP {e.response.status_code}): {e.response.text}" ) from e except httpx.RequestError as e: logger.error(f"Confluence API request error: {e!s}", exc_info=True) raise Exception(f"Confluence API request failed: {e!s}") from e async def get_all_spaces(self) -> list[dict[str, Any]]: """ Fetch all spaces from Confluence. Returns: List of space objects Raises: ValueError: If credentials have not been set Exception: If the API request fails """ params = { "limit": 100, } all_spaces = [] cursor = None while True: if cursor: params["cursor"] = cursor result = await self._make_api_request("spaces", params) if not isinstance(result, dict) or "results" not in result: raise Exception("Invalid response from Confluence API") spaces = result["results"] all_spaces.extend(spaces) # Check if there are more spaces to fetch links = result.get("_links", {}) if "next" not in links: break # Extract cursor from next link if available next_link = links["next"] if "cursor=" in next_link: cursor = next_link.split("cursor=")[1].split("&")[0] else: break return all_spaces async def get_pages_in_space( self, space_id: str, include_body: bool = True ) -> list[dict[str, Any]]: """ Fetch all pages in a specific space. Args: space_id: The ID of the space to fetch pages from include_body: Whether to include page body content Returns: List of page objects Raises: ValueError: If credentials have not been set Exception: If the API request fails """ params = { "limit": 100, } if include_body: params["body-format"] = "storage" all_pages = [] cursor = None while True: if cursor: params["cursor"] = cursor result = await self._make_api_request(f"spaces/{space_id}/pages", params) if not isinstance(result, dict) or "results" not in result: raise Exception("Invalid response from Confluence API") pages = result["results"] all_pages.extend(pages) # Check if there are more pages to fetch links = result.get("_links", {}) if "next" not in links: break # Extract cursor from next link if available next_link = links["next"] if "cursor=" in next_link: cursor = next_link.split("cursor=")[1].split("&")[0] else: break return all_pages async def get_page_comments(self, page_id: str) -> list[dict[str, Any]]: """ Fetch all comments for a specific page (both footer and inline comments). Args: page_id: The ID of the page to fetch comments from Returns: List of comment objects Raises: ValueError: If credentials have not been set Exception: If the API request fails """ all_comments = [] # Get footer comments footer_comments = await self._get_comments_for_page(page_id, "footer-comments") all_comments.extend(footer_comments) # Get inline comments inline_comments = await self._get_comments_for_page(page_id, "inline-comments") all_comments.extend(inline_comments) return all_comments async def _get_comments_for_page( self, page_id: str, comment_type: str ) -> list[dict[str, Any]]: """ Helper method to fetch comments of a specific type for a page. Args: page_id: The ID of the page comment_type: Type of comments ('footer-comments' or 'inline-comments') Returns: List of comment objects """ params = { "limit": 100, "body-format": "storage", } all_comments = [] cursor = None while True: if cursor: params["cursor"] = cursor result = await self._make_api_request( f"pages/{page_id}/{comment_type}", params ) if not isinstance(result, dict) or "results" not in result: break # No comments or invalid response comments = result["results"] all_comments.extend(comments) # Check if there are more comments to fetch links = result.get("_links", {}) if "next" not in links: break # Extract cursor from next link if available next_link = links["next"] if "cursor=" in next_link: cursor = next_link.split("cursor=")[1].split("&")[0] else: break return all_comments async def get_pages_by_date_range( self, start_date: str, end_date: str, space_ids: list[str] | None = None, include_comments: bool = True, ) -> tuple[list[dict[str, Any]], str | None]: """ Fetch pages within a date range, optionally filtered by spaces. Args: start_date: Start date in YYYY-MM-DD format end_date: End date in YYYY-MM-DD format (inclusive) space_ids: Optional list of space IDs to filter pages include_comments: Whether to include comments for each page Returns: Tuple containing (pages list with comments, error message or None) """ try: if not self._use_oauth: # Use legacy ConfluenceConnector for API requests client = await self._get_legacy_client() # Ensure credentials are loaded await self._get_valid_token() # ConfluenceConnector.get_pages_by_date_range is synchronous import asyncio loop = asyncio.get_event_loop() return await loop.run_in_executor( None, client.get_pages_by_date_range, start_date, end_date, space_ids, include_comments, ) # OAuth flow all_pages = [] if space_ids: # Fetch pages from specific spaces for space_id in space_ids: pages = await self.get_pages_in_space(space_id, include_body=True) all_pages.extend(pages) else: # Fetch all pages (this might be expensive for large instances) params = { "limit": 100, "body-format": "storage", } cursor = None while True: if cursor: params["cursor"] = cursor result = await self._make_api_request("pages", params) if not isinstance(result, dict) or "results" not in result: break pages = result["results"] all_pages.extend(pages) links = result.get("_links", {}) if "next" not in links: break next_link = links["next"] if "cursor=" in next_link: cursor = next_link.split("cursor=")[1].split("&")[0] else: break return all_pages, None except Exception as e: return [], f"Error fetching pages: {e!s}" async def close(self): """Close the HTTP client connection.""" if self._http_client: await self._http_client.aclose() self._http_client = None # Legacy client doesn't need explicit closing self._legacy_confluence_client = None async def __aenter__(self): """Async context manager entry.""" return self async def __aexit__(self, exc_type, exc_val, exc_tb): """Async context manager exit.""" await self.close()