mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-04-26 09:16:22 +02:00
Webcrawler connector draft
This commit is contained in:
parent
419f94e8ee
commit
896e410e2a
26 changed files with 1225 additions and 9 deletions
|
|
@ -600,3 +600,46 @@ async def _index_elasticsearch_documents(
|
|||
await run_elasticsearch_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
||||
|
||||
@celery_app.task(name="index_webcrawler_urls", bind=True)
|
||||
def index_webcrawler_urls_task(
|
||||
self,
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Celery task to index Webcrawler Urls."""
|
||||
import asyncio
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
try:
|
||||
loop.run_until_complete(
|
||||
_index_webcrawler_urls(
|
||||
connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
)
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
|
||||
async def _index_webcrawler_urls(
|
||||
connector_id: int,
|
||||
search_space_id: int,
|
||||
user_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
):
|
||||
"""Index Webcrawler Urls with new session."""
|
||||
from app.routes.search_source_connectors_routes import (
|
||||
run_webcrawler_indexing,
|
||||
)
|
||||
|
||||
async with get_celery_session_maker()() as session:
|
||||
await run_webcrawler_indexing(
|
||||
session, connector_id, search_space_id, user_id, start_date, end_date
|
||||
)
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ async def _check_and_trigger_schedules():
|
|||
index_luma_events_task,
|
||||
index_notion_pages_task,
|
||||
index_slack_messages_task,
|
||||
index_webcrawler_urls_task
|
||||
)
|
||||
|
||||
# Map connector types to their tasks
|
||||
|
|
@ -94,6 +95,7 @@ async def _check_and_trigger_schedules():
|
|||
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
|
||||
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
|
||||
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
|
||||
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_webcrawler_urls_task,
|
||||
}
|
||||
|
||||
# Trigger indexing for each due connector
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue