diff --git a/surfsense_backend/.env.example b/surfsense_backend/.env.example
index 628329917..c4facc84d 100644
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@@ -143,6 +143,15 @@ STT_SERVICE=local/base
 PAGES_LIMIT=500
 
 
+# Residential Proxy Configuration (anonymous-proxies.net)
+# Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
+# Leave commented out to disable proxying.
+# RESIDENTIAL_PROXY_USERNAME=your_proxy_username
+# RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
+# RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
+# RESIDENTIAL_PROXY_LOCATION=
+# RESIDENTIAL_PROXY_TYPE=1
+
 FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
 
 # File Parser Service
diff --git a/surfsense_backend/alembic/versions/93_add_image_generations_table.py b/surfsense_backend/alembic/versions/93_add_image_generations_table.py
index eba9d7c86..d2cee5af4 100644
--- a/surfsense_backend/alembic/versions/93_add_image_generations_table.py
+++ b/surfsense_backend/alembic/versions/93_add_image_generations_table.py
@@ -13,8 +13,7 @@ Changes:
 from collections.abc import Sequence
 
 import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import ENUM as PG_ENUM
-from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.dialects.postgresql import ENUM as PG_ENUM, JSONB, UUID
 
 from alembic import op
 
diff --git a/surfsense_backend/app/agents/new_chat/tools/link_preview.py b/surfsense_backend/app/agents/new_chat/tools/link_preview.py
index 3e2070a14..bf7b4af38 100644
--- a/surfsense_backend/app/agents/new_chat/tools/link_preview.py
+++ b/surfsense_backend/app/agents/new_chat/tools/link_preview.py
@@ -17,6 +17,8 @@ from fake_useragent import UserAgent
 from langchain_core.tools import tool
 from playwright.async_api import async_playwright
 
+from app.utils.proxy_config import get_playwright_proxy, get_residential_proxy_url
+
 logger = logging.getLogger(__name__)
 
 
@@ -186,9 +188,15 @@ async def fetch_with_chromium(url: str) -> dict[str, Any] | None:
         ua = UserAgent()
         user_agent = ua.random
 
+        # Use residential proxy if configured
+        playwright_proxy = get_playwright_proxy()
+
         # Use Playwright to fetch the page
         async with async_playwright() as p:
-            browser = await p.chromium.launch(headless=True)
+            launch_kwargs: dict = {"headless": True}
+            if playwright_proxy:
+                launch_kwargs["proxy"] = playwright_proxy
+            browser = await p.chromium.launch(**launch_kwargs)
             context = await browser.new_context(user_agent=user_agent)
             page = await context.new_page()
 
@@ -283,12 +291,16 @@ def create_link_preview_tool():
             ua = UserAgent()
             user_agent = ua.random
 
+            # Use residential proxy if configured
+            proxy_url = get_residential_proxy_url()
+
             # Use a browser-like User-Agent to fetch Open Graph metadata.
             # We're only fetching publicly available metadata (title, description, thumbnail)
             # that websites intentionally expose via OG tags for link preview purposes.
             async with httpx.AsyncClient(
                 timeout=10.0,
                 follow_redirects=True,
+                proxy=proxy_url,
                 headers={
                     "User-Agent": user_agent,
                     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
diff --git a/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py
index 24f15edba..e3c58c857 100644
--- a/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py
+++ b/surfsense_backend/app/agents/new_chat/tools/scrape_webpage.py
@@ -2,17 +2,26 @@
 Web scraping tool for the SurfSense agent.
 
 This module provides a tool for scraping and extracting content from webpages
-using the existing WebCrawlerConnector. The scraped content can be used by
-the agent to answer questions about web pages.
+using the existing WebCrawlerConnector. For YouTube URLs, it fetches the
+transcript directly via the YouTubeTranscriptApi instead of crawling the page.
 """
 
 import hashlib
+import logging
 from typing import Any
 from urllib.parse import urlparse
 
+import aiohttp
+from fake_useragent import UserAgent
 from langchain_core.tools import tool
+from requests import Session
+from youtube_transcript_api import YouTubeTranscriptApi
 
 from app.connectors.webcrawler_connector import WebCrawlerConnector
+from app.tasks.document_processors.youtube_processor import get_youtube_video_id
+from app.utils.proxy_config import get_requests_proxies
+
+logger = logging.getLogger(__name__)
 
 
 def extract_domain(url: str) -> str:
@@ -57,6 +66,89 @@ def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]:
     return truncated + "\n\n[Content truncated...]", True
 
 
+async def _scrape_youtube_video(
+    url: str, video_id: str, max_length: int
+) -> dict[str, Any]:
+    """
+    Fetch YouTube video metadata and transcript via the YouTubeTranscriptApi.
+
+    Returns a result dict in the same shape as the regular scrape_webpage output.
+    """
+    scrape_id = generate_scrape_id(url)
+    domain = "youtube.com"
+
+    # --- Video metadata via oEmbed ---
+    residential_proxies = get_requests_proxies()
+
+    params = {
+        "format": "json",
+        "url": f"https://www.youtube.com/watch?v={video_id}",
+    }
+    oembed_url = "https://www.youtube.com/oembed"
+
+    try:
+        async with (
+            aiohttp.ClientSession() as http_session,
+            http_session.get(
+                oembed_url,
+                params=params,
+                proxy=residential_proxies["http"] if residential_proxies else None,
+            ) as response,
+        ):
+            video_data = await response.json()
+    except Exception:
+        video_data = {}
+
+    title = video_data.get("title", "YouTube Video")
+    author = video_data.get("author_name", "Unknown")
+
+    # --- Transcript via YouTubeTranscriptApi ---
+    try:
+        ua = UserAgent()
+        http_client = Session()
+        http_client.headers.update({"User-Agent": ua.random})
+        if residential_proxies:
+            http_client.proxies.update(residential_proxies)
+        ytt_api = YouTubeTranscriptApi(http_client=http_client)
+        captions = ytt_api.fetch(video_id)
+
+        transcript_segments = []
+        for line in captions:
+            start_time = line.start
+            duration = line.duration
+            text = line.text
+            timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
+            transcript_segments.append(f"{timestamp} {text}")
+        transcript_text = "\n".join(transcript_segments)
+    except Exception as e:
+        logger.warning(f"[scrape_webpage] No transcript for video {video_id}: {e}")
+        transcript_text = f"No captions available for this video. Error: {e!s}"
+
+    # Build combined content
+    content = f"# {title}\n\n**Author:** {author}\n**Video ID:** {video_id}\n\n## Transcript\n\n{transcript_text}"
+
+    # Truncate if needed
+    content, was_truncated = truncate_content(content, max_length)
+    word_count = len(content.split())
+
+    description = f"YouTube video by {author}"
+
+    return {
+        "id": scrape_id,
+        "assetId": url,
+        "kind": "article",
+        "href": url,
+        "title": title,
+        "description": description,
+        "content": content,
+        "domain": domain,
+        "word_count": word_count,
+        "was_truncated": was_truncated,
+        "crawler_type": "youtube_transcript",
+        "author": author,
+    }
+
+
 def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
     """
     Factory function to create the scrape_webpage tool.
@@ -79,7 +171,8 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
 
         Use this tool when the user wants you to read, summarize, or answer
         questions about a specific webpage's content. This tool actually
-        fetches and reads the full page content.
+        fetches and reads the full page content. For YouTube video URLs it
+        fetches the transcript directly instead of crawling the page.
 
         Common triggers:
         - "Read this article and summarize it"
@@ -114,6 +207,11 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
             url = f"https://{url}"
 
         try:
+            # Check if this is a YouTube URL and use transcript API instead
+            video_id = get_youtube_video_id(url)
+            if video_id:
+                return await _scrape_youtube_video(url, video_id, max_length)
+
             # Create webcrawler connector
             connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key)
 
@@ -184,7 +282,7 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
 
         except Exception as e:
             error_message = str(e)
-            print(f"[scrape_webpage] Error scraping {url}: {error_message}")
+            logger.error(f"[scrape_webpage] Error scraping {url}: {error_message}")
             return {
                 "id": scrape_id,
                 "assetId": url,
diff --git a/surfsense_backend/app/config/__init__.py b/surfsense_backend/app/config/__init__.py
index bb299e583..e102c414d 100644
--- a/surfsense_backend/app/config/__init__.py
+++ b/surfsense_backend/app/config/__init__.py
@@ -360,6 +360,14 @@ class Config:
         # LlamaCloud API Key
         LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
 
+    # Residential Proxy Configuration (anonymous-proxies.net)
+    # Used for web crawling and YouTube transcript fetching to avoid IP bans.
+    RESIDENTIAL_PROXY_USERNAME = os.getenv("RESIDENTIAL_PROXY_USERNAME")
+    RESIDENTIAL_PROXY_PASSWORD = os.getenv("RESIDENTIAL_PROXY_PASSWORD")
+    RESIDENTIAL_PROXY_HOSTNAME = os.getenv("RESIDENTIAL_PROXY_HOSTNAME")
+    RESIDENTIAL_PROXY_LOCATION = os.getenv("RESIDENTIAL_PROXY_LOCATION", "")
+    RESIDENTIAL_PROXY_TYPE = int(os.getenv("RESIDENTIAL_PROXY_TYPE", "1"))
+
     # Litellm TTS Configuration
     TTS_SERVICE = os.getenv("TTS_SERVICE")
     TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
diff --git a/surfsense_backend/app/connectors/webcrawler_connector.py b/surfsense_backend/app/connectors/webcrawler_connector.py
index 5d6ea98c8..a5fb33e1a 100644
--- a/surfsense_backend/app/connectors/webcrawler_connector.py
+++ b/surfsense_backend/app/connectors/webcrawler_connector.py
@@ -14,6 +14,8 @@ from fake_useragent import UserAgent
 from firecrawl import AsyncFirecrawlApp
 from playwright.async_api import async_playwright
 
+from app.utils.proxy_config import get_playwright_proxy
+
 logger = logging.getLogger(__name__)
 
 
@@ -165,9 +167,15 @@ class WebCrawlerConnector:
         ua = UserAgent()
         user_agent = ua.random
 
+        # Use residential proxy if configured
+        playwright_proxy = get_playwright_proxy()
+
         # Use Playwright to fetch the page
         async with async_playwright() as p:
-            browser = await p.chromium.launch(headless=True)
+            launch_kwargs: dict = {"headless": True}
+            if playwright_proxy:
+                launch_kwargs["proxy"] = playwright_proxy
+            browser = await p.chromium.launch(**launch_kwargs)
             context = await browser.new_context(user_agent=user_agent)
             page = await context.new_page()
 
diff --git a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
index 760651589..a35528a93 100644
--- a/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
+++ b/surfsense_backend/app/tasks/celery_tasks/connector_tasks.py
@@ -15,12 +15,12 @@ logger = logging.getLogger(__name__)
 def _handle_greenlet_error(e: Exception, task_name: str, connector_id: int) -> None:
     """
     Handle greenlet_spawn errors with detailed logging for debugging.
-    
+
     The 'greenlet_spawn has not been called' error occurs when:
     1. SQLAlchemy lazy-loads a relationship outside of an async context
     2. A sync operation is called from an async context (or vice versa)
     3. Session objects are accessed after the session is closed
-    
+
     This helper logs detailed context to help identify the root cause.
     """
     error_str = str(e)
diff --git a/surfsense_backend/app/tasks/connector_indexers/base.py b/surfsense_backend/app/tasks/connector_indexers/base.py
index b801b67d6..5dc438b9b 100644
--- a/surfsense_backend/app/tasks/connector_indexers/base.py
+++ b/surfsense_backend/app/tasks/connector_indexers/base.py
@@ -52,8 +52,8 @@ def parse_date_flexible(date_str: str) -> datetime:
     # Try ISO format as fallback
     try:
         return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
-    except ValueError:
-        raise ValueError(f"Unable to parse date: {date_str}")
+    except ValueError as err:
+        raise ValueError(f"Unable to parse date: {date_str}") from err
 
 
 async def check_duplicate_document_by_hash(
diff --git a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
index ed300898c..ba494bb9f 100644
--- a/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/notion_indexer.py
@@ -217,7 +217,7 @@ async def index_notion_pages(
                 )
                 await task_logger.log_task_failure(
                     log_entry,
-                    f"Failed to get Notion pages: Notion API limitation",
+                    "Failed to get Notion pages: Notion API limitation",
                     f"{error_str} - This page contains Notion AI content (transcription/ai_block) that cannot be accessed via the API.",
                     {"error_type": "UnsupportedBlockType", "is_known_limitation": True},
                 )
diff --git a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
index a2f0898ba..82ef8870d 100644
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@@ -138,7 +138,7 @@ async def index_crawled_urls(
                 f"No URLs provided for indexing. Connector ID: {connector_id}, "
                 f"Connector name: {connector.name}, "
                 f"Config keys: {list(connector.config.keys()) if connector.config else 'None'}, "
-                f"INITIAL_URLS raw value: {repr(raw_initial_urls)}"
+                f"INITIAL_URLS raw value: {raw_initial_urls!r}"
             )
             await task_logger.log_task_failure(
                 log_entry,
diff --git a/surfsense_backend/app/tasks/document_processors/youtube_processor.py b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
index 7251fb22f..e5599e78b 100644
--- a/surfsense_backend/app/tasks/document_processors/youtube_processor.py
+++ b/surfsense_backend/app/tasks/document_processors/youtube_processor.py
@@ -6,6 +6,8 @@ import logging
 from urllib.parse import parse_qs, urlparse
 
 import aiohttp
+from fake_useragent import UserAgent
+from requests import Session
 from sqlalchemy.exc import SQLAlchemyError
 from sqlalchemy.ext.asyncio import AsyncSession
 from youtube_transcript_api import YouTubeTranscriptApi
@@ -19,6 +21,7 @@ from app.utils.document_converters import (
     generate_document_summary,
     generate_unique_identifier_hash,
 )
+from app.utils.proxy_config import get_requests_proxies
 
 from .base import (
     check_document_by_unique_identifier,
@@ -114,9 +117,16 @@ async def add_youtube_video_document(
         }
         oembed_url = "https://www.youtube.com/oembed"
 
+        # Build residential proxy URL (if configured)
+        residential_proxies = get_requests_proxies()
+
         async with (
             aiohttp.ClientSession() as http_session,
-            http_session.get(oembed_url, params=params) as response,
+            http_session.get(
+                oembed_url,
+                params=params,
+                proxy=residential_proxies["http"] if residential_proxies else None,
+            ) as response,
         ):
             video_data = await response.json()
 
@@ -138,7 +148,12 @@ async def add_youtube_video_document(
         )
 
         try:
-            ytt_api = YouTubeTranscriptApi()
+            ua = UserAgent()
+            http_client = Session()
+            http_client.headers.update({"User-Agent": ua.random})
+            if residential_proxies:
+                http_client.proxies.update(residential_proxies)
+            ytt_api = YouTubeTranscriptApi(http_client=http_client)
             captions = ytt_api.fetch(video_id)
             # Include complete caption information with timestamps
             transcript_segments = []
diff --git a/surfsense_backend/app/utils/proxy_config.py b/surfsense_backend/app/utils/proxy_config.py
new file mode 100644
index 000000000..de377e366
--- /dev/null
+++ b/surfsense_backend/app/utils/proxy_config.py
@@ -0,0 +1,86 @@
+"""
+Residential proxy configuration utility.
+
+Reads proxy credentials from the application Config and provides helper
+functions that return proxy configs in the format expected by different
+HTTP libraries (requests, httpx, aiohttp, Playwright).
+"""
+
+import base64
+import json
+import logging
+
+from app.config import Config
+
+logger = logging.getLogger(__name__)
+
+
+def _build_password_b64() -> str | None:
+    """
+    Build the base64-encoded password dict required by anonymous-proxies.net.
+
+    Returns ``None`` when the required config values are not set.
+    """
+    password = Config.RESIDENTIAL_PROXY_PASSWORD
+    if not password:
+        return None
+
+    password_dict = {
+        "p": password,
+        "l": Config.RESIDENTIAL_PROXY_LOCATION,
+        "t": Config.RESIDENTIAL_PROXY_TYPE,
+    }
+    return base64.b64encode(json.dumps(password_dict).encode("utf-8")).decode("utf-8")
+
+
+def get_residential_proxy_url() -> str | None:
+    """
+    Return the fully-formed residential proxy URL, or ``None`` when not
+    configured.
+
+    The URL format is::
+
+        http://<username>:<base64_password>@<hostname>/
+    """
+    username = Config.RESIDENTIAL_PROXY_USERNAME
+    hostname = Config.RESIDENTIAL_PROXY_HOSTNAME
+    password_b64 = _build_password_b64()
+
+    if not all([username, hostname, password_b64]):
+        return None
+
+    return f"http://{username}:{password_b64}@{hostname}/"
+
+
+def get_requests_proxies() -> dict[str, str] | None:
+    """
+    Return a ``{"http": …, "https": …}`` dict suitable for
+    ``requests.Session.proxies`` and ``aiohttp`` ``proxy=`` kwarg,
+    or ``None`` when not configured.
+    """
+    proxy_url = get_residential_proxy_url()
+    if proxy_url is None:
+        return None
+    return {"http": proxy_url, "https": proxy_url}
+
+
+def get_playwright_proxy() -> dict[str, str] | None:
+    """
+    Return a Playwright-compatible proxy dict::
+
+        {"server": "http://host:port", "username": "…", "password": "…"}
+
+    or ``None`` when not configured.
+    """
+    username = Config.RESIDENTIAL_PROXY_USERNAME
+    hostname = Config.RESIDENTIAL_PROXY_HOSTNAME
+    password_b64 = _build_password_b64()
+
+    if not all([username, hostname, password_b64]):
+        return None
+
+    return {
+        "server": f"http://{hostname}",
+        "username": username,
+        "password": password_b64,
+    }