mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-29 19:35:20 +02:00
Merge pull request #789 from MODSetter/dev
feat: rotating proxy support
This commit is contained in:
commit
ac35f9d674
14 changed files with 261 additions and 26 deletions
|
|
@ -143,6 +143,15 @@ STT_SERVICE=local/base
|
||||||
PAGES_LIMIT=500
|
PAGES_LIMIT=500
|
||||||
|
|
||||||
|
|
||||||
|
# Residential Proxy Configuration (anonymous-proxies.net)
|
||||||
|
# Used for web crawling, link previews, and YouTube transcript fetching to avoid IP bans.
|
||||||
|
# Leave commented out to disable proxying.
|
||||||
|
# RESIDENTIAL_PROXY_USERNAME=your_proxy_username
|
||||||
|
# RESIDENTIAL_PROXY_PASSWORD=your_proxy_password
|
||||||
|
# RESIDENTIAL_PROXY_HOSTNAME=rotating.dnsproxifier.com:31230
|
||||||
|
# RESIDENTIAL_PROXY_LOCATION=
|
||||||
|
# RESIDENTIAL_PROXY_TYPE=1
|
||||||
|
|
||||||
FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
|
FIRECRAWL_API_KEY=fcr-01J0000000000000000000000
|
||||||
|
|
||||||
# File Parser Service
|
# File Parser Service
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,7 @@ Changes:
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
|
||||||
import sqlalchemy as sa
|
import sqlalchemy as sa
|
||||||
from sqlalchemy.dialects.postgresql import ENUM as PG_ENUM
|
from sqlalchemy.dialects.postgresql import ENUM as PG_ENUM, JSONB, UUID
|
||||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
||||||
|
|
||||||
from alembic import op
|
from alembic import op
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,8 @@ from fake_useragent import UserAgent
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
from app.utils.proxy_config import get_playwright_proxy, get_residential_proxy_url
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -186,9 +188,15 @@ async def fetch_with_chromium(url: str) -> dict[str, Any] | None:
|
||||||
ua = UserAgent()
|
ua = UserAgent()
|
||||||
user_agent = ua.random
|
user_agent = ua.random
|
||||||
|
|
||||||
|
# Use residential proxy if configured
|
||||||
|
playwright_proxy = get_playwright_proxy()
|
||||||
|
|
||||||
# Use Playwright to fetch the page
|
# Use Playwright to fetch the page
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.chromium.launch(headless=True)
|
launch_kwargs: dict = {"headless": True}
|
||||||
|
if playwright_proxy:
|
||||||
|
launch_kwargs["proxy"] = playwright_proxy
|
||||||
|
browser = await p.chromium.launch(**launch_kwargs)
|
||||||
context = await browser.new_context(user_agent=user_agent)
|
context = await browser.new_context(user_agent=user_agent)
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
|
||||||
|
|
@ -283,12 +291,16 @@ def create_link_preview_tool():
|
||||||
ua = UserAgent()
|
ua = UserAgent()
|
||||||
user_agent = ua.random
|
user_agent = ua.random
|
||||||
|
|
||||||
|
# Use residential proxy if configured
|
||||||
|
proxy_url = get_residential_proxy_url()
|
||||||
|
|
||||||
# Use a browser-like User-Agent to fetch Open Graph metadata.
|
# Use a browser-like User-Agent to fetch Open Graph metadata.
|
||||||
# We're only fetching publicly available metadata (title, description, thumbnail)
|
# We're only fetching publicly available metadata (title, description, thumbnail)
|
||||||
# that websites intentionally expose via OG tags for link preview purposes.
|
# that websites intentionally expose via OG tags for link preview purposes.
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
timeout=10.0,
|
timeout=10.0,
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
|
proxy=proxy_url,
|
||||||
headers={
|
headers={
|
||||||
"User-Agent": user_agent,
|
"User-Agent": user_agent,
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,26 @@
|
||||||
Web scraping tool for the SurfSense agent.
|
Web scraping tool for the SurfSense agent.
|
||||||
|
|
||||||
This module provides a tool for scraping and extracting content from webpages
|
This module provides a tool for scraping and extracting content from webpages
|
||||||
using the existing WebCrawlerConnector. The scraped content can be used by
|
using the existing WebCrawlerConnector. For YouTube URLs, it fetches the
|
||||||
the agent to answer questions about web pages.
|
transcript directly via the YouTubeTranscriptApi instead of crawling the page.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import logging
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from fake_useragent import UserAgent
|
||||||
from langchain_core.tools import tool
|
from langchain_core.tools import tool
|
||||||
|
from requests import Session
|
||||||
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
|
|
||||||
from app.connectors.webcrawler_connector import WebCrawlerConnector
|
from app.connectors.webcrawler_connector import WebCrawlerConnector
|
||||||
|
from app.tasks.document_processors.youtube_processor import get_youtube_video_id
|
||||||
|
from app.utils.proxy_config import get_requests_proxies
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def extract_domain(url: str) -> str:
|
def extract_domain(url: str) -> str:
|
||||||
|
|
@ -57,6 +66,89 @@ def truncate_content(content: str, max_length: int = 50000) -> tuple[str, bool]:
|
||||||
return truncated + "\n\n[Content truncated...]", True
|
return truncated + "\n\n[Content truncated...]", True
|
||||||
|
|
||||||
|
|
||||||
|
async def _scrape_youtube_video(
|
||||||
|
url: str, video_id: str, max_length: int
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Fetch YouTube video metadata and transcript via the YouTubeTranscriptApi.
|
||||||
|
|
||||||
|
Returns a result dict in the same shape as the regular scrape_webpage output.
|
||||||
|
"""
|
||||||
|
scrape_id = generate_scrape_id(url)
|
||||||
|
domain = "youtube.com"
|
||||||
|
|
||||||
|
# --- Video metadata via oEmbed ---
|
||||||
|
residential_proxies = get_requests_proxies()
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"format": "json",
|
||||||
|
"url": f"https://www.youtube.com/watch?v={video_id}",
|
||||||
|
}
|
||||||
|
oembed_url = "https://www.youtube.com/oembed"
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with (
|
||||||
|
aiohttp.ClientSession() as http_session,
|
||||||
|
http_session.get(
|
||||||
|
oembed_url,
|
||||||
|
params=params,
|
||||||
|
proxy=residential_proxies["http"] if residential_proxies else None,
|
||||||
|
) as response,
|
||||||
|
):
|
||||||
|
video_data = await response.json()
|
||||||
|
except Exception:
|
||||||
|
video_data = {}
|
||||||
|
|
||||||
|
title = video_data.get("title", "YouTube Video")
|
||||||
|
author = video_data.get("author_name", "Unknown")
|
||||||
|
|
||||||
|
# --- Transcript via YouTubeTranscriptApi ---
|
||||||
|
try:
|
||||||
|
ua = UserAgent()
|
||||||
|
http_client = Session()
|
||||||
|
http_client.headers.update({"User-Agent": ua.random})
|
||||||
|
if residential_proxies:
|
||||||
|
http_client.proxies.update(residential_proxies)
|
||||||
|
ytt_api = YouTubeTranscriptApi(http_client=http_client)
|
||||||
|
captions = ytt_api.fetch(video_id)
|
||||||
|
|
||||||
|
transcript_segments = []
|
||||||
|
for line in captions:
|
||||||
|
start_time = line.start
|
||||||
|
duration = line.duration
|
||||||
|
text = line.text
|
||||||
|
timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
|
||||||
|
transcript_segments.append(f"{timestamp} {text}")
|
||||||
|
transcript_text = "\n".join(transcript_segments)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"[scrape_webpage] No transcript for video {video_id}: {e}")
|
||||||
|
transcript_text = f"No captions available for this video. Error: {e!s}"
|
||||||
|
|
||||||
|
# Build combined content
|
||||||
|
content = f"# {title}\n\n**Author:** {author}\n**Video ID:** {video_id}\n\n## Transcript\n\n{transcript_text}"
|
||||||
|
|
||||||
|
# Truncate if needed
|
||||||
|
content, was_truncated = truncate_content(content, max_length)
|
||||||
|
word_count = len(content.split())
|
||||||
|
|
||||||
|
description = f"YouTube video by {author}"
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": scrape_id,
|
||||||
|
"assetId": url,
|
||||||
|
"kind": "article",
|
||||||
|
"href": url,
|
||||||
|
"title": title,
|
||||||
|
"description": description,
|
||||||
|
"content": content,
|
||||||
|
"domain": domain,
|
||||||
|
"word_count": word_count,
|
||||||
|
"was_truncated": was_truncated,
|
||||||
|
"crawler_type": "youtube_transcript",
|
||||||
|
"author": author,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
||||||
"""
|
"""
|
||||||
Factory function to create the scrape_webpage tool.
|
Factory function to create the scrape_webpage tool.
|
||||||
|
|
@ -79,7 +171,8 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
||||||
|
|
||||||
Use this tool when the user wants you to read, summarize, or answer
|
Use this tool when the user wants you to read, summarize, or answer
|
||||||
questions about a specific webpage's content. This tool actually
|
questions about a specific webpage's content. This tool actually
|
||||||
fetches and reads the full page content.
|
fetches and reads the full page content. For YouTube video URLs it
|
||||||
|
fetches the transcript directly instead of crawling the page.
|
||||||
|
|
||||||
Common triggers:
|
Common triggers:
|
||||||
- "Read this article and summarize it"
|
- "Read this article and summarize it"
|
||||||
|
|
@ -114,6 +207,11 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
||||||
url = f"https://{url}"
|
url = f"https://{url}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
# Check if this is a YouTube URL and use transcript API instead
|
||||||
|
video_id = get_youtube_video_id(url)
|
||||||
|
if video_id:
|
||||||
|
return await _scrape_youtube_video(url, video_id, max_length)
|
||||||
|
|
||||||
# Create webcrawler connector
|
# Create webcrawler connector
|
||||||
connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key)
|
connector = WebCrawlerConnector(firecrawl_api_key=firecrawl_api_key)
|
||||||
|
|
||||||
|
|
@ -184,7 +282,7 @@ def create_scrape_webpage_tool(firecrawl_api_key: str | None = None):
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_message = str(e)
|
error_message = str(e)
|
||||||
print(f"[scrape_webpage] Error scraping {url}: {error_message}")
|
logger.error(f"[scrape_webpage] Error scraping {url}: {error_message}")
|
||||||
return {
|
return {
|
||||||
"id": scrape_id,
|
"id": scrape_id,
|
||||||
"assetId": url,
|
"assetId": url,
|
||||||
|
|
|
||||||
|
|
@ -360,6 +360,14 @@ class Config:
|
||||||
# LlamaCloud API Key
|
# LlamaCloud API Key
|
||||||
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
|
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
|
||||||
|
|
||||||
|
# Residential Proxy Configuration (anonymous-proxies.net)
|
||||||
|
# Used for web crawling and YouTube transcript fetching to avoid IP bans.
|
||||||
|
RESIDENTIAL_PROXY_USERNAME = os.getenv("RESIDENTIAL_PROXY_USERNAME")
|
||||||
|
RESIDENTIAL_PROXY_PASSWORD = os.getenv("RESIDENTIAL_PROXY_PASSWORD")
|
||||||
|
RESIDENTIAL_PROXY_HOSTNAME = os.getenv("RESIDENTIAL_PROXY_HOSTNAME")
|
||||||
|
RESIDENTIAL_PROXY_LOCATION = os.getenv("RESIDENTIAL_PROXY_LOCATION", "")
|
||||||
|
RESIDENTIAL_PROXY_TYPE = int(os.getenv("RESIDENTIAL_PROXY_TYPE", "1"))
|
||||||
|
|
||||||
# Litellm TTS Configuration
|
# Litellm TTS Configuration
|
||||||
TTS_SERVICE = os.getenv("TTS_SERVICE")
|
TTS_SERVICE = os.getenv("TTS_SERVICE")
|
||||||
TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
|
TTS_SERVICE_API_BASE = os.getenv("TTS_SERVICE_API_BASE")
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@ from fake_useragent import UserAgent
|
||||||
from firecrawl import AsyncFirecrawlApp
|
from firecrawl import AsyncFirecrawlApp
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
from app.utils.proxy_config import get_playwright_proxy
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -165,9 +167,15 @@ class WebCrawlerConnector:
|
||||||
ua = UserAgent()
|
ua = UserAgent()
|
||||||
user_agent = ua.random
|
user_agent = ua.random
|
||||||
|
|
||||||
|
# Use residential proxy if configured
|
||||||
|
playwright_proxy = get_playwright_proxy()
|
||||||
|
|
||||||
# Use Playwright to fetch the page
|
# Use Playwright to fetch the page
|
||||||
async with async_playwright() as p:
|
async with async_playwright() as p:
|
||||||
browser = await p.chromium.launch(headless=True)
|
launch_kwargs: dict = {"headless": True}
|
||||||
|
if playwright_proxy:
|
||||||
|
launch_kwargs["proxy"] = playwright_proxy
|
||||||
|
browser = await p.chromium.launch(**launch_kwargs)
|
||||||
context = await browser.new_context(user_agent=user_agent)
|
context = await browser.new_context(user_agent=user_agent)
|
||||||
page = await context.new_page()
|
page = await context.new_page()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,8 @@ def parse_date_flexible(date_str: str) -> datetime:
|
||||||
# Try ISO format as fallback
|
# Try ISO format as fallback
|
||||||
try:
|
try:
|
||||||
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
return datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
||||||
except ValueError:
|
except ValueError as err:
|
||||||
raise ValueError(f"Unable to parse date: {date_str}")
|
raise ValueError(f"Unable to parse date: {date_str}") from err
|
||||||
|
|
||||||
|
|
||||||
async def check_duplicate_document_by_hash(
|
async def check_duplicate_document_by_hash(
|
||||||
|
|
|
||||||
|
|
@ -217,7 +217,7 @@ async def index_notion_pages(
|
||||||
)
|
)
|
||||||
await task_logger.log_task_failure(
|
await task_logger.log_task_failure(
|
||||||
log_entry,
|
log_entry,
|
||||||
f"Failed to get Notion pages: Notion API limitation",
|
"Failed to get Notion pages: Notion API limitation",
|
||||||
f"{error_str} - This page contains Notion AI content (transcription/ai_block) that cannot be accessed via the API.",
|
f"{error_str} - This page contains Notion AI content (transcription/ai_block) that cannot be accessed via the API.",
|
||||||
{"error_type": "UnsupportedBlockType", "is_known_limitation": True},
|
{"error_type": "UnsupportedBlockType", "is_known_limitation": True},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,7 @@ async def index_crawled_urls(
|
||||||
f"No URLs provided for indexing. Connector ID: {connector_id}, "
|
f"No URLs provided for indexing. Connector ID: {connector_id}, "
|
||||||
f"Connector name: {connector.name}, "
|
f"Connector name: {connector.name}, "
|
||||||
f"Config keys: {list(connector.config.keys()) if connector.config else 'None'}, "
|
f"Config keys: {list(connector.config.keys()) if connector.config else 'None'}, "
|
||||||
f"INITIAL_URLS raw value: {repr(raw_initial_urls)}"
|
f"INITIAL_URLS raw value: {raw_initial_urls!r}"
|
||||||
)
|
)
|
||||||
await task_logger.log_task_failure(
|
await task_logger.log_task_failure(
|
||||||
log_entry,
|
log_entry,
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@ import logging
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
from fake_useragent import UserAgent
|
||||||
|
from requests import Session
|
||||||
from sqlalchemy.exc import SQLAlchemyError
|
from sqlalchemy.exc import SQLAlchemyError
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from youtube_transcript_api import YouTubeTranscriptApi
|
from youtube_transcript_api import YouTubeTranscriptApi
|
||||||
|
|
@ -19,6 +21,7 @@ from app.utils.document_converters import (
|
||||||
generate_document_summary,
|
generate_document_summary,
|
||||||
generate_unique_identifier_hash,
|
generate_unique_identifier_hash,
|
||||||
)
|
)
|
||||||
|
from app.utils.proxy_config import get_requests_proxies
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
check_document_by_unique_identifier,
|
check_document_by_unique_identifier,
|
||||||
|
|
@ -114,9 +117,16 @@ async def add_youtube_video_document(
|
||||||
}
|
}
|
||||||
oembed_url = "https://www.youtube.com/oembed"
|
oembed_url = "https://www.youtube.com/oembed"
|
||||||
|
|
||||||
|
# Build residential proxy URL (if configured)
|
||||||
|
residential_proxies = get_requests_proxies()
|
||||||
|
|
||||||
async with (
|
async with (
|
||||||
aiohttp.ClientSession() as http_session,
|
aiohttp.ClientSession() as http_session,
|
||||||
http_session.get(oembed_url, params=params) as response,
|
http_session.get(
|
||||||
|
oembed_url,
|
||||||
|
params=params,
|
||||||
|
proxy=residential_proxies["http"] if residential_proxies else None,
|
||||||
|
) as response,
|
||||||
):
|
):
|
||||||
video_data = await response.json()
|
video_data = await response.json()
|
||||||
|
|
||||||
|
|
@ -138,7 +148,12 @@ async def add_youtube_video_document(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
ytt_api = YouTubeTranscriptApi()
|
ua = UserAgent()
|
||||||
|
http_client = Session()
|
||||||
|
http_client.headers.update({"User-Agent": ua.random})
|
||||||
|
if residential_proxies:
|
||||||
|
http_client.proxies.update(residential_proxies)
|
||||||
|
ytt_api = YouTubeTranscriptApi(http_client=http_client)
|
||||||
captions = ytt_api.fetch(video_id)
|
captions = ytt_api.fetch(video_id)
|
||||||
# Include complete caption information with timestamps
|
# Include complete caption information with timestamps
|
||||||
transcript_segments = []
|
transcript_segments = []
|
||||||
|
|
|
||||||
86
surfsense_backend/app/utils/proxy_config.py
Normal file
86
surfsense_backend/app/utils/proxy_config.py
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
"""
|
||||||
|
Residential proxy configuration utility.
|
||||||
|
|
||||||
|
Reads proxy credentials from the application Config and provides helper
|
||||||
|
functions that return proxy configs in the format expected by different
|
||||||
|
HTTP libraries (requests, httpx, aiohttp, Playwright).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import base64
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from app.config import Config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_password_b64() -> str | None:
|
||||||
|
"""
|
||||||
|
Build the base64-encoded password dict required by anonymous-proxies.net.
|
||||||
|
|
||||||
|
Returns ``None`` when the required config values are not set.
|
||||||
|
"""
|
||||||
|
password = Config.RESIDENTIAL_PROXY_PASSWORD
|
||||||
|
if not password:
|
||||||
|
return None
|
||||||
|
|
||||||
|
password_dict = {
|
||||||
|
"p": password,
|
||||||
|
"l": Config.RESIDENTIAL_PROXY_LOCATION,
|
||||||
|
"t": Config.RESIDENTIAL_PROXY_TYPE,
|
||||||
|
}
|
||||||
|
return base64.b64encode(json.dumps(password_dict).encode("utf-8")).decode("utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def get_residential_proxy_url() -> str | None:
|
||||||
|
"""
|
||||||
|
Return the fully-formed residential proxy URL, or ``None`` when not
|
||||||
|
configured.
|
||||||
|
|
||||||
|
The URL format is::
|
||||||
|
|
||||||
|
http://<username>:<base64_password>@<hostname>/
|
||||||
|
"""
|
||||||
|
username = Config.RESIDENTIAL_PROXY_USERNAME
|
||||||
|
hostname = Config.RESIDENTIAL_PROXY_HOSTNAME
|
||||||
|
password_b64 = _build_password_b64()
|
||||||
|
|
||||||
|
if not all([username, hostname, password_b64]):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return f"http://{username}:{password_b64}@{hostname}/"
|
||||||
|
|
||||||
|
|
||||||
|
def get_requests_proxies() -> dict[str, str] | None:
|
||||||
|
"""
|
||||||
|
Return a ``{"http": …, "https": …}`` dict suitable for
|
||||||
|
``requests.Session.proxies`` and ``aiohttp`` ``proxy=`` kwarg,
|
||||||
|
or ``None`` when not configured.
|
||||||
|
"""
|
||||||
|
proxy_url = get_residential_proxy_url()
|
||||||
|
if proxy_url is None:
|
||||||
|
return None
|
||||||
|
return {"http": proxy_url, "https": proxy_url}
|
||||||
|
|
||||||
|
|
||||||
|
def get_playwright_proxy() -> dict[str, str] | None:
|
||||||
|
"""
|
||||||
|
Return a Playwright-compatible proxy dict::
|
||||||
|
|
||||||
|
{"server": "http://host:port", "username": "…", "password": "…"}
|
||||||
|
|
||||||
|
or ``None`` when not configured.
|
||||||
|
"""
|
||||||
|
username = Config.RESIDENTIAL_PROXY_USERNAME
|
||||||
|
hostname = Config.RESIDENTIAL_PROXY_HOSTNAME
|
||||||
|
password_b64 = _build_password_b64()
|
||||||
|
|
||||||
|
if not all([username, hostname, password_b64]):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"server": f"http://{hostname}",
|
||||||
|
"username": username,
|
||||||
|
"password": password_b64,
|
||||||
|
}
|
||||||
|
|
@ -351,14 +351,14 @@ export const ComposerAddAttachment: FC = () => {
|
||||||
<PlusIcon className="aui-attachment-add-icon size-5 stroke-[1.5px]" />
|
<PlusIcon className="aui-attachment-add-icon size-5 stroke-[1.5px]" />
|
||||||
</TooltipIconButton>
|
</TooltipIconButton>
|
||||||
</DropdownMenuTrigger>
|
</DropdownMenuTrigger>
|
||||||
<DropdownMenuContent align="start" className="w-48 bg-background border-border">
|
<DropdownMenuContent align="start" className="w-72 bg-background border-border">
|
||||||
<DropdownMenuItem onSelect={handleChatAttachment} className="cursor-pointer">
|
<DropdownMenuItem onSelect={handleChatAttachment} className="cursor-pointer">
|
||||||
<Paperclip className="size-4" />
|
<Paperclip className="size-4" />
|
||||||
<span>Add attachment</span>
|
<span>Add attachment to this chat</span>
|
||||||
</DropdownMenuItem>
|
</DropdownMenuItem>
|
||||||
<DropdownMenuItem onClick={handleFileUpload} className="cursor-pointer">
|
<DropdownMenuItem onClick={handleFileUpload} className="cursor-pointer">
|
||||||
<Upload className="size-4" />
|
<Upload className="size-4" />
|
||||||
<span>Upload Documents</span>
|
<span>Upload documents to Search Space</span>
|
||||||
</DropdownMenuItem>
|
</DropdownMenuItem>
|
||||||
</DropdownMenuContent>
|
</DropdownMenuContent>
|
||||||
</DropdownMenu>
|
</DropdownMenu>
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,11 @@ const demoPlans = [
|
||||||
features: [
|
features: [
|
||||||
"Open source on GitHub",
|
"Open source on GitHub",
|
||||||
"Upload and chat with 300+ pages of content",
|
"Upload and chat with 300+ pages of content",
|
||||||
"Connects with 8 popular sources, like Drive and Notion.",
|
"Connects with 8 popular sources, like Drive and Notion",
|
||||||
"Includes limited access to ChatGPT, Claude, and DeepSeek models",
|
"Includes limited access to ChatGPT, Claude, and DeepSeek models",
|
||||||
"Supports 100+ more LLMs, including Gemini, Llama and many more.",
|
"Supports 100+ more LLMs, including Gemini, Llama and many more",
|
||||||
"50+ File extensions supported.",
|
"50+ File extensions supported",
|
||||||
"Generate podcasts in seconds.",
|
"Generate podcasts in seconds",
|
||||||
"Cross-Browser Extension for dynamic webpages including authenticated content",
|
"Cross-Browser Extension for dynamic webpages including authenticated content",
|
||||||
"Community support on Discord",
|
"Community support on Discord",
|
||||||
],
|
],
|
||||||
|
|
@ -33,8 +33,8 @@ const demoPlans = [
|
||||||
billingText: "billed annually",
|
billingText: "billed annually",
|
||||||
features: [
|
features: [
|
||||||
"Everything in Free",
|
"Everything in Free",
|
||||||
"Upload and chat with 5,000+ pages of content",
|
"Upload and chat with 5,000+ pages of content per user",
|
||||||
"Connects with 15+ external sources, like Slack and Airtable.",
|
"Connects with 15+ external sources, like Slack and Airtable",
|
||||||
"Includes extended access to ChatGPT, Claude, and DeepSeek models",
|
"Includes extended access to ChatGPT, Claude, and DeepSeek models",
|
||||||
"Collaboration and commenting features",
|
"Collaboration and commenting features",
|
||||||
"Shared BYOK (Bring Your Own Key)",
|
"Shared BYOK (Bring Your Own Key)",
|
||||||
|
|
@ -42,7 +42,7 @@ const demoPlans = [
|
||||||
"Planned: Centralized billing",
|
"Planned: Centralized billing",
|
||||||
"Priority support",
|
"Priority support",
|
||||||
],
|
],
|
||||||
description: "The AIknowledge base for individuals and teams",
|
description: "The AI knowledge base for individuals and teams",
|
||||||
buttonText: "Upgrade",
|
buttonText: "Upgrade",
|
||||||
href: "/contact",
|
href: "/contact",
|
||||||
isPopular: true,
|
isPopular: true,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue