refactor: enhance web crawling functionality with Firecrawl integration

- Updated WebCrawlerConnector to prioritize Firecrawl API for crawling if an API key is provided, falling back to Chromium if Firecrawl fails.
- Improved error handling to log failures from both Firecrawl and Chromium.
- Enhanced link preview tool to use a random User-Agent for better compatibility with web servers.
- Passed Firecrawl API key to the stream_new_chat function for improved configuration management.
This commit is contained in:
Anish Sarkar 2025-12-26 02:37:20 +05:30
parent ad5a49c2c6
commit d9df63f57e
3 changed files with 41 additions and 26 deletions

View file

@ -146,6 +146,16 @@ async def stream_new_chat(
# Create connector service
connector_service = ConnectorService(session, search_space_id=search_space_id)
# Get Firecrawl API key from webcrawler connector if configured
from app.db import SearchSourceConnectorType
firecrawl_api_key = None
webcrawler_connector = await connector_service.get_connector_by_type(
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR, search_space_id
)
if webcrawler_connector and webcrawler_connector.config:
firecrawl_api_key = webcrawler_connector.config.get("FIRECRAWL_API_KEY")
# Get the PostgreSQL checkpointer for persistent conversation memory
checkpointer = await get_checkpointer()
@ -157,6 +167,7 @@ async def stream_new_chat(
connector_service=connector_service,
checkpointer=checkpointer,
agent_config=agent_config, # Pass prompt configuration
firecrawl_api_key=firecrawl_api_key, # Pass Firecrawl API key if configured
)
# Build input with message history from frontend