SurfSense/surfsense_backend/app/connectors/notion_history.py

from notion_client import AsyncClient


class NotionHistoryConnector:
    def __init__(self, token):
        """
        Initialize the NotionPageFetcher with a token.

        Args:
            token (str): Notion integration token
        """
        self.notion = AsyncClient(auth=token)

    async def close(self):
        """Close the async client connection."""
        await self.notion.aclose()

    async def __aenter__(self):
        """Async context manager entry."""
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit."""
        await self.close()

    async def get_all_pages(self, start_date=None, end_date=None):
        """
        Fetches all pages shared with your integration and their content.

        Args:
            start_date (str, optional): ISO 8601 date string (e.g., "2023-01-01T00:00:00Z")
            end_date (str, optional): ISO 8601 date string (e.g., "2023-12-31T23:59:59Z")

        Returns:
            list: List of dictionaries containing page data
        """
        # Build the filter for the search
        # Note: Notion API requires specific filter structure
        search_params = {}

        # Filter for pages only (not databases)
        search_params["filter"] = {"value": "page", "property": "object"}

        # Add date filters if provided
        if start_date or end_date:
            date_filter = {}

            if start_date:
                date_filter["on_or_after"] = start_date

            if end_date:
                date_filter["on_or_before"] = end_date

            # Add the date filter to the search params
            if date_filter:
                search_params["sort"] = {
                    "direction": "descending",
                    "timestamp": "last_edited_time",
                }

        # Paginate through all pages the integration has access to
        pages = []
        has_more = True
        cursor = None

        while has_more:
            if cursor:
                search_params["start_cursor"] = cursor

            search_results = await self.notion.search(**search_params)

            pages.extend(search_results["results"])
            has_more = search_results.get("has_more", False)

            if has_more:
                cursor = search_results.get("next_cursor")

        all_page_data = []

        for page in pages:
            page_id = page["id"]

            # Get detailed page information
            page_content = await self.get_page_content(page_id)

            all_page_data.append(
                {
                    "page_id": page_id,
                    "title": self.get_page_title(page),
                    "content": page_content,
                }
            )

        return all_page_data

    def get_page_title(self, page):
        """
        Extracts the title from a page object.

        Args:
            page (dict): Notion page object

        Returns:
            str: Page title or a fallback string
        """
        # Title can be in different properties depending on the page type
        if "properties" in page:
            # Try to find a title property
            for _prop_name, prop_data in page["properties"].items():
                if prop_data["type"] == "title" and len(prop_data["title"]) > 0:
                    return " ".join(
                        [text_obj["plain_text"] for text_obj in prop_data["title"]]
                    )

        # If no title found, return the page ID as fallback
        return f"Untitled page ({page['id']})"

    async def get_page_content(self, page_id):
        """
        Fetches the content (blocks) of a specific page.

        Args:
            page_id (str): The ID of the page to fetch

        Returns:
            list: List of processed blocks from the page
        """
        blocks = []
        has_more = True
        cursor = None

        # Paginate through all blocks
        while has_more:
            if cursor:
                response = await self.notion.blocks.children.list(
                    block_id=page_id, start_cursor=cursor
                )
            else:
                response = await self.notion.blocks.children.list(block_id=page_id)

            blocks.extend(response["results"])
            has_more = response["has_more"]

            if has_more:
                cursor = response["next_cursor"]

        # Process nested blocks recursively
        processed_blocks = []
        for block in blocks:
            processed_block = await self.process_block(block)
            processed_blocks.append(processed_block)

        return processed_blocks

    async def process_block(self, block):
        """
        Processes a block and recursively fetches any child blocks.

        Args:
            block (dict): The block to process

        Returns:
            dict: Processed block with content and children
        """
        block_id = block["id"]
        block_type = block["type"]

        # Extract block content based on its type
        content = self.extract_block_content(block)

        # Check if block has children
        has_children = block.get("has_children", False)
        child_blocks = []

        if has_children:
            # Fetch and process child blocks
            children_response = await self.notion.blocks.children.list(
                block_id=block_id
            )
            for child_block in children_response["results"]:
                child_blocks.append(await self.process_block(child_block))

        return {
            "id": block_id,
            "type": block_type,
            "content": content,
            "children": child_blocks,
        }

    def extract_block_content(self, block):
        """
        Extracts the content from a block based on its type.

        Args:
            block (dict): The block to extract content from

        Returns:
            str: Extracted content as a string
        """
        block_type = block["type"]

        # Different block types have different structures
        if block_type in block and "rich_text" in block[block_type]:
            return "".join(
                [text_obj["plain_text"] for text_obj in block[block_type]["rich_text"]]
            )
        elif block_type == "image":
            # Instead of returning the raw URL which may contain sensitive AWS credentials,
            # return a placeholder or reference to the image
            if "file" in block["image"]:
                # For Notion-hosted images (which use AWS S3 pre-signed URLs)
                return "[Notion Image]"
            elif "external" in block["image"]:
                # For external images, we can return a sanitized reference
                url = block["image"]["external"]["url"]
                # Only return the domain part of external URLs to avoid potential sensitive parameters
                try:
                    from urllib.parse import urlparse

                    parsed_url = urlparse(url)
                    return f"[External Image from {parsed_url.netloc}]"
                except Exception:
                    return "[External Image]"
        elif block_type == "code":
            language = block["code"]["language"]
            code_text = "".join(
                [text_obj["plain_text"] for text_obj in block["code"]["rich_text"]]
            )
            return f"```{language}\n{code_text}\n```"
        elif block_type == "equation":
            return block["equation"]["expression"]
        # Add more block types as needed

        # Return empty string for unsupported block types
        return ""
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`from notion_client import AsyncClient`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`class NotionHistoryConnector:`
			`def __init__(self, token):`
			`"""`
			`Initialize the NotionPageFetcher with a token.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`token (str): Notion integration token`
			`"""`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`self.notion = AsyncClient(auth=token)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`async def close(self):`
			`"""Close the async client connection."""`
			`await self.notion.aclose()`

			`async def __aenter__(self):`
			`"""Async context manager entry."""`
			`return self`

			`async def __aexit__(self, exc_type, exc_val, exc_tb):`
			`"""Async context manager exit."""`
			`await self.close()`

			`async def get_all_pages(self, start_date=None, end_date=None):`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`"""`
			`Fetches all pages shared with your integration and their content.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`start_date (str, optional): ISO 8601 date string (e.g., "2023-01-01T00:00:00Z")`
			`end_date (str, optional): ISO 8601 date string (e.g., "2023-12-31T23:59:59Z")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Returns:`
			`list: List of dictionaries containing page data`
			`"""`
			`# Build the filter for the search`
			`# Note: Notion API requires specific filter structure`
			`search_params = {}`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Filter for pages only (not databases)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`search_params["filter"] = {"value": "page", "property": "object"}`

$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Add date filters if provided`
			`if start_date or end_date:`
			`date_filter = {}`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`if start_date:`
			`date_filter["on_or_after"] = start_date`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`if end_date:`
			`date_filter["on_or_before"] = end_date`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Add the date filter to the search params`
			`if date_filter:`
			`search_params["sort"] = {`
			`"direction": "descending",`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`"timestamp": "last_edited_time",`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`}`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ refactor: added batch commits and Increased task time limits in celery_app.py - Increased task time limits in celery_app.py for longer processing times. - Enhanced pagination logic in NotionHistoryConnector to handle large result sets. - Implemented batch commits every 10 documents across various indexers (Airtable, ClickUp, Confluence, Discord, GitHub, Google Calendar, Gmail, JIRA, Linear, Luma, Notion, Slack) to improve performance and reduce database load. - Updated final commit logging for clarity on total documents processed. 2025-11-03 15:57:19 -08:00			`# Paginate through all pages the integration has access to`
			`pages = []`
			`has_more = True`
			`cursor = None`

			`while has_more:`
			`if cursor:`
			`search_params["start_cursor"] = cursor`
$DESKTOP-RTLN3BA\$punk$ chore: linting 2025-11-03 16:00:58 -08:00
$DESKTOP-RTLN3BA\$punk$ refactor: added batch commits and Increased task time limits in celery_app.py - Increased task time limits in celery_app.py for longer processing times. - Enhanced pagination logic in NotionHistoryConnector to handle large result sets. - Implemented batch commits every 10 documents across various indexers (Airtable, ClickUp, Confluence, Discord, GitHub, Google Calendar, Gmail, JIRA, Linear, Luma, Notion, Slack) to improve performance and reduce database load. - Updated final commit logging for clarity on total documents processed. 2025-11-03 15:57:19 -08:00			`search_results = await self.notion.search(**search_params)`
$DESKTOP-RTLN3BA\$punk$ chore: linting 2025-11-03 16:00:58 -08:00
$DESKTOP-RTLN3BA\$punk$ refactor: added batch commits and Increased task time limits in celery_app.py - Increased task time limits in celery_app.py for longer processing times. - Enhanced pagination logic in NotionHistoryConnector to handle large result sets. - Implemented batch commits every 10 documents across various indexers (Airtable, ClickUp, Confluence, Discord, GitHub, Google Calendar, Gmail, JIRA, Linear, Luma, Notion, Slack) to improve performance and reduce database load. - Updated final commit logging for clarity on total documents processed. 2025-11-03 15:57:19 -08:00			`pages.extend(search_results["results"])`
			`has_more = search_results.get("has_more", False)`
$DESKTOP-RTLN3BA\$punk$ chore: linting 2025-11-03 16:00:58 -08:00
$DESKTOP-RTLN3BA\$punk$ refactor: added batch commits and Increased task time limits in celery_app.py - Increased task time limits in celery_app.py for longer processing times. - Enhanced pagination logic in NotionHistoryConnector to handle large result sets. - Implemented batch commits every 10 documents across various indexers (Airtable, ClickUp, Confluence, Discord, GitHub, Google Calendar, Gmail, JIRA, Linear, Luma, Notion, Slack) to improve performance and reduce database load. - Updated final commit logging for clarity on total documents processed. 2025-11-03 15:57:19 -08:00			`if has_more:`
			`cursor = search_results.get("next_cursor")`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`all_page_data = []`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`for page in pages:`
			`page_id = page["id"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Get detailed page information`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`page_content = await self.get_page_content(page_id)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
			`all_page_data.append(`
			`{`
			`"page_id": page_id,`
			`"title": self.get_page_title(page),`
			`"content": page_content,`
			`}`
			`)`

$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`return all_page_data`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`def get_page_title(self, page):`
			`"""`
			`Extracts the title from a page object.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`page (dict): Notion page object`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Returns:`
			`str: Page title or a fallback string`
			`"""`
			`# Title can be in different properties depending on the page type`
			`if "properties" in page:`
			`# Try to find a title property`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`for _prop_name, prop_data in page["properties"].items():`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`if prop_data["type"] == "title" and len(prop_data["title"]) > 0:`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`return " ".join(`
			`[text_obj["plain_text"] for text_obj in prop_data["title"]]`
			`)`

$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# If no title found, return the page ID as fallback`
			`return f"Untitled page ({page['id']})"`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`async def get_page_content(self, page_id):`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`"""`
			`Fetches the content (blocks) of a specific page.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`page_id (str): The ID of the page to fetch`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Returns:`
			`list: List of processed blocks from the page`
			`"""`
			`blocks = []`
			`has_more = True`
			`cursor = None`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Paginate through all blocks`
			`while has_more:`
			`if cursor:`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`response = await self.notion.blocks.children.list(`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`block_id=page_id, start_cursor=cursor`
			`)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`else:`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`response = await self.notion.blocks.children.list(block_id=page_id)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`blocks.extend(response["results"])`
			`has_more = response["has_more"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`if has_more:`
			`cursor = response["next_cursor"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Process nested blocks recursively`
			`processed_blocks = []`
			`for block in blocks:`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`processed_block = await self.process_block(block)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`processed_blocks.append(processed_block)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`return processed_blocks`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`async def process_block(self, block):`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`"""`
			`Processes a block and recursively fetches any child blocks.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`block (dict): The block to process`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Returns:`
			`dict: Processed block with content and children`
			`"""`
			`block_id = block["id"]`
			`block_type = block["type"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Extract block content based on its type`
			`content = self.extract_block_content(block)`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Check if block has children`
			`has_children = block.get("has_children", False)`
			`child_blocks = []`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`if has_children:`
			`# Fetch and process child blocks`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`children_response = await self.notion.blocks.children.list(`
			`block_id=block_id`
			`)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`for child_block in children_response["results"]:`
$DESKTOP-RTLN3BA\$punk$ fix: made notion indexing async 2025-08-21 14:43:04 -07:00			`child_blocks.append(await self.process_block(child_block))`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`return {`
			`"id": block_id,`
			`"type": block_type,`
			`"content": content,`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`"children": child_blocks,`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`}`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`def extract_block_content(self, block):`
			`"""`
			`Extracts the content from a block based on its type.`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Args:`
			`block (dict): The block to extract content from`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`Returns:`
			`str: Extracted content as a string`
			`"""`
			`block_type = block["type"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Different block types have different structures`
			`if block_type in block and "rich_text" in block[block_type]:`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`return "".join(`
			`[text_obj["plain_text"] for text_obj in block[block_type]["rich_text"]]`
			`)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`elif block_type == "image":`
			`# Instead of returning the raw URL which may contain sensitive AWS credentials,`
			`# return a placeholder or reference to the image`
			`if "file" in block["image"]:`
			`# For Notion-hosted images (which use AWS S3 pre-signed URLs)`
			`return "[Notion Image]"`
			`elif "external" in block["image"]:`
			`# For external images, we can return a sanitized reference`
			`url = block["image"]["external"]["url"]`
			`# Only return the domain part of external URLs to avoid potential sensitive parameters`
			`try:`
			`from urllib.parse import urlparse`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`parsed_url = urlparse(url)`
			`return f"[External Image from {parsed_url.netloc}]"`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`except Exception:`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`return "[External Image]"`
			`elif block_type == "code":`
			`language = block["code"]["language"]`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00			`code_text = "".join(`
			`[text_obj["plain_text"] for text_obj in block["code"]["rich_text"]]`
			`)`
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			return f"```{language}\n{code_text}\n```"
			`elif block_type == "equation":`
			`return block["equation"]["expression"]`
			`# Add more block types as needed`
Fixed all ruff lint and formatting errors 2025-07-24 14:43:48 -07:00
$DESKTOP-RTLN3BA\$punk$ feat: SurfSense v0.0.6 init 2025-03-14 18:53:14 -07:00			`# Return empty string for unsupported block types`
			`return ""`