mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-10 20:35:17 +02:00
- Replaced Playwright with Scrapling's fetchers in the web crawling and YouTube processing modules for improved performance and flexibility. - Updated proxy configuration to support dynamic proxy selection via environment variables. - Enhanced logging to track performance metrics during web scraping operations. - Refactored related modules to utilize the new proxy utilities and streamline the scraping process.
40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
"""Modular residential / rotating proxy provider package.
|
|
|
|
Selects a provider via the ``PROXY_PROVIDER`` env var (see ``registry.py``) and
|
|
exposes proxy settings in the formats different HTTP libraries expect. Add new
|
|
vendors by implementing :class:`ProxyProvider` in ``providers/`` and registering
|
|
them in ``registry.py``.
|
|
"""
|
|
|
|
from app.utils.proxy.base import ProxyProvider
|
|
from app.utils.proxy.registry import get_active_provider
|
|
|
|
|
|
def get_proxy_url() -> str | None:
|
|
"""Canonical ``http://user:pass@host:port`` URL for Scrapling/curl_cffi."""
|
|
return get_active_provider().get_proxy_url()
|
|
|
|
|
|
def get_playwright_proxy() -> dict[str, str] | None:
|
|
"""Playwright-style proxy dict, or ``None`` when not configured."""
|
|
return get_active_provider().get_playwright_proxy()
|
|
|
|
|
|
def get_requests_proxies() -> dict[str, str] | None:
|
|
"""``{"http": ..., "https": ...}`` dict for requests/aiohttp, or ``None``."""
|
|
return get_active_provider().get_requests_proxies()
|
|
|
|
|
|
def get_residential_proxy_url() -> str | None:
|
|
"""Backward-compatible alias for :func:`get_proxy_url`."""
|
|
return get_proxy_url()
|
|
|
|
|
|
__all__ = [
|
|
"ProxyProvider",
|
|
"get_active_provider",
|
|
"get_playwright_proxy",
|
|
"get_proxy_url",
|
|
"get_requests_proxies",
|
|
"get_residential_proxy_url",
|
|
]
|