Renaming resources

This commit is contained in:
samkul-swe 2025-11-22 19:19:00 -08:00
parent 6d19e0fad8
commit 121e2f0c0e
24 changed files with 117 additions and 273 deletions

View file

@ -602,8 +602,8 @@ async def _index_elasticsearch_documents(
)
@celery_app.task(name="index_webcrawler_urls", bind=True)
def index_webcrawler_urls_task(
@celery_app.task(name="index_crawled_urls", bind=True)
def index_crawled_urls_task(
self,
connector_id: int,
search_space_id: int,
@ -611,7 +611,7 @@ def index_webcrawler_urls_task(
start_date: str,
end_date: str,
):
"""Celery task to index Webcrawler Urls."""
"""Celery task to index Web page Urls."""
import asyncio
loop = asyncio.new_event_loop()
@ -619,7 +619,7 @@ def index_webcrawler_urls_task(
try:
loop.run_until_complete(
_index_webcrawler_urls(
_index_crawled_urls(
connector_id, search_space_id, user_id, start_date, end_date
)
)
@ -627,19 +627,19 @@ def index_webcrawler_urls_task(
loop.close()
async def _index_webcrawler_urls(
async def _index_crawled_urls(
connector_id: int,
search_space_id: int,
user_id: str,
start_date: str,
end_date: str,
):
"""Index Webcrawler Urls with new session."""
"""Index Web page Urls with new session."""
from app.routes.search_source_connectors_routes import (
run_webcrawler_indexing,
run_web_page_indexing,
)
async with get_celery_session_maker()() as session:
await run_webcrawler_indexing(
await run_web_page_indexing(
session, connector_id, search_space_id, user_id, start_date, end_date
)

View file

@ -77,7 +77,7 @@ async def _check_and_trigger_schedules():
index_luma_events_task,
index_notion_pages_task,
index_slack_messages_task,
index_webcrawler_urls_task
index_crawled_urls_task
)
# Map connector types to their tasks
@ -95,7 +95,7 @@ async def _check_and_trigger_schedules():
SearchSourceConnectorType.DISCORD_CONNECTOR: index_discord_messages_task,
SearchSourceConnectorType.LUMA_CONNECTOR: index_luma_events_task,
SearchSourceConnectorType.ELASTICSEARCH_CONNECTOR: index_elasticsearch_documents_task,
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_webcrawler_urls_task,
SearchSourceConnectorType.WEBCRAWLER_CONNECTOR: index_crawled_urls_task,
}
# Trigger indexing for each due connector

View file

@ -42,7 +42,7 @@ from .luma_indexer import index_luma_events
# Documentation and knowledge management
from .notion_indexer import index_notion_pages
from .slack_indexer import index_slack_messages
from .webcrawler_indexer import index_webcrawler_urls
from .webcrawler_indexer import index_crawled_urls
__all__ = [ # noqa: RUF022
"index_airtable_records",
@ -60,7 +60,7 @@ __all__ = [ # noqa: RUF022
"index_linear_issues",
# Documentation and knowledge management
"index_notion_pages",
"index_webcrawler_urls",
"index_crawled_urls",
# Communication platforms
"index_slack_messages",
"index_google_gmail_messages",

View file

@ -27,7 +27,7 @@ from .base import (
)
async def index_webcrawler_urls(
async def index_crawled_urls(
session: AsyncSession,
connector_id: int,
search_space_id: int,
@ -37,7 +37,7 @@ async def index_webcrawler_urls(
update_last_indexed: bool = True,
) -> tuple[int, str | None]:
"""
Index webcrawler URLs.
Index web page URLs.
Args:
session: Database session
@ -55,9 +55,9 @@ async def index_webcrawler_urls(
# Log task start
log_entry = await task_logger.log_task_start(
task_name="webcrawler_url_indexing",
task_name="crawled_url_indexing",
source="connector_indexing_task",
message=f"Starting webcrawler URL indexing for connector {connector_id}",
message=f"Starting web page URL indexing for connector {connector_id}",
metadata={
"connector_id": connector_id,
"user_id": str(user_id),
@ -104,7 +104,7 @@ async def index_webcrawler_urls(
urls = []
logger.info(
f"Starting webcrawler indexing for connector {connector_id} with {len(urls)} URLs"
f"Starting crawled web page indexing for connector {connector_id} with {len(urls)} URLs"
)
# Initialize webcrawler client
@ -367,7 +367,7 @@ async def index_webcrawler_urls(
await task_logger.log_task_success(
log_entry,
f"Successfully completed webcrawler indexing for connector {connector_id}",
f"Successfully completed crawled web page indexing for connector {connector_id}",
{
"urls_processed": total_processed,
"documents_indexed": documents_indexed,
@ -378,7 +378,7 @@ async def index_webcrawler_urls(
)
logger.info(
f"Webcrawler indexing completed: {documents_indexed} new, "
f"Web page indexing completed: {documents_indexed} new, "
f"{documents_updated} updated, {documents_skipped} skipped, "
f"{len(failed_urls)} failed"
)
@ -388,7 +388,7 @@ async def index_webcrawler_urls(
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during webcrawler indexing for connector {connector_id}",
f"Database error during web page indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"},
)
@ -398,12 +398,12 @@ async def index_webcrawler_urls(
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index webcrawler URLs for connector {connector_id}",
f"Failed to index web page URLs for connector {connector_id}",
str(e),
{"error_type": type(e).__name__},
)
logger.error(f"Failed to index webcrawler URLs: {e!s}", exc_info=True)
return 0, f"Failed to index webcrawler URLs: {e!s}"
logger.error(f"Failed to index web page URLs: {e!s}", exc_info=True)
return 0, f"Failed to index web page URLs: {e!s}"
async def get_crawled_url_documents(