chore(lint): ruff checks

2026-07-12 22:42:13 +02:00 · 2025-11-26 13:22:31 -08:00 · 2025-11-26 13:22:31 -08:00 · 8f30cfd69a
commit 8f30cfd69a
parent 34fbee0c28
8 changed files with 44 additions and 26 deletions
--- a/surfsense_backend/alembic/versions/36_remove_fk_constraints_for_global_llm_configs.py
+++ b/surfsense_backend/alembic/versions/36_remove_fk_constraints_for_global_llm_configs.py
@ -8,9 +8,10 @@ Create Date: 2025-11-13 23:20:12.912741

 from collections.abc import Sequence

-from alembic import op
 from sqlalchemy import text

+from alembic import op
+
 # revision identifiers, used by Alembic.
 revision: str = "36"
 down_revision: str | None = "35"
@ -49,7 +50,9 @@ def upgrade() -> None:
    ]

    for constraint_name in constraints_to_drop:
-        if constraint_exists(connection, "user_search_space_preferences", constraint_name):
+        if constraint_exists(
+            connection, "user_search_space_preferences", constraint_name
+        ):
            op.drop_constraint(
                constraint_name,
                "user_search_space_preferences",
@ -67,13 +70,18 @@ def downgrade() -> None:

    # Re-add the foreign key constraints if they don't exist
    constraints_to_create = [
-        ("user_search_space_preferences_long_context_llm_id_fkey", "long_context_llm_id"),
+        (
+            "user_search_space_preferences_long_context_llm_id_fkey",
+            "long_context_llm_id",
+        ),
        ("user_search_space_preferences_fast_llm_id_fkey", "fast_llm_id"),
        ("user_search_space_preferences_strategic_llm_id_fkey", "strategic_llm_id"),
    ]

    for constraint_name, column_name in constraints_to_create:
-        if not constraint_exists(connection, "user_search_space_preferences", constraint_name):
+        if not constraint_exists(
+            connection, "user_search_space_preferences", constraint_name
+        ):
            op.create_foreign_key(
                constraint_name,
                "user_search_space_preferences",
--- a/surfsense_backend/app/connectors/webcrawler_connector.py
+++ b/surfsense_backend/app/connectors/webcrawler_connector.py
@ -105,7 +105,7 @@ class WebCrawlerConnector:

        # Extract content based on format
        content = scrape_result.markdown or scrape_result.html or ""
-        
+
        # Extract metadata
        metadata = scrape_result.metadata if scrape_result.metadata else {}

--- a/surfsense_backend/app/routes/search_source_connectors_routes.py
+++ b/surfsense_backend/app/routes/search_source_connectors_routes.py
@ -39,6 +39,7 @@ from app.tasks.connector_indexers import (
    index_airtable_records,
    index_clickup_tasks,
    index_confluence_pages,
+    index_crawled_urls,
    index_discord_messages,
    index_elasticsearch_documents,
    index_github_repos,
@ -49,7 +50,6 @@ from app.tasks.connector_indexers import (
    index_luma_events,
    index_notion_pages,
    index_slack_messages,
-    index_crawled_urls,
 )
 from app.users import current_active_user
 from app.utils.check_ownership import check_ownership
@ -1537,6 +1537,7 @@ async def run_elasticsearch_indexing(
            exc_info=True,
        )

+
 # Add new helper functions for crawled web page indexing
 async def run_web_page_indexing_with_new_session(
    connector_id: int,
@ -1595,4 +1596,4 @@ async def run_web_page_indexing(
                f"Web page indexing failed or no documents processed: {error_or_warning}"
            )
    except Exception as e:
-        logger.error(f"Error in background Web page indexing task: {e!s}")
+        logger.error(f"Error in background Web page indexing task: {e!s}")
--- a/surfsense_backend/app/services/connector_service.py
+++ b/surfsense_backend/app/services/connector_service.py
@ -118,7 +118,9 @@ class ConnectorService:

                # Extract webcrawler-specific metadata
                url = metadata.get("source", metadata.get("url", ""))
-                title = document.get("title", metadata.get("title", "Untitled Document"))
+                title = document.get(
+                    "title", metadata.get("title", "Untitled Document")
+                )
                description = metadata.get("description", "")
                language = metadata.get("language", "")
                last_crawled_at = metadata.get("last_crawled_at", "")
@ -2573,4 +2575,4 @@ class ConnectorService:
            "sources": sources_list,
        }

-        return result_object, elasticsearch_chunks
+        return result_object, elasticsearch_chunks
--- a/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
+++ b/surfsense_backend/app/tasks/celery_tasks/schedule_checker_task.py
@ -67,6 +67,7 @@ async def _check_and_trigger_schedules():
                index_airtable_records_task,
                index_clickup_tasks_task,
                index_confluence_pages_task,
+                index_crawled_urls_task,
                index_discord_messages_task,
                index_elasticsearch_documents_task,
                index_github_repos_task,
@ -77,7 +78,6 @@ async def _check_and_trigger_schedules():
                index_luma_events_task,
                index_notion_pages_task,
                index_slack_messages_task,
-                index_crawled_urls_task
            )

            # Map connector types to their tasks
--- a/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
+++ b/surfsense_backend/app/tasks/connector_indexers/webcrawler_indexer.py
@ -93,7 +93,7 @@ async def index_crawled_urls(

        # Get the Firecrawl API key from the connector config (optional)
        api_key = connector.config.get("FIRECRAWL_API_KEY")
-        
+
        # Get URLs from connector config
        initial_urls = connector.config.get("INITIAL_URLS", "")
        if isinstance(initial_urls, str):
@ -177,7 +177,9 @@ async def index_crawled_urls(
                    continue

                # Format content as structured document
-                structured_document = crawler.format_to_structured_document(crawl_result)
+                structured_document = crawler.format_to_structured_document(
+                    crawl_result
+                )

                # Generate unique identifier hash for this URL
                unique_identifier_hash = generate_unique_identifier_hash(
@ -185,7 +187,9 @@ async def index_crawled_urls(
                )

                # Generate content hash
-                content_hash = generate_content_hash(structured_document, search_space_id)
+                content_hash = generate_content_hash(
+                    structured_document, search_space_id
+                )

                # Check if document with this unique identifier already exists
                existing_document = await check_document_by_unique_identifier(
@ -205,7 +209,9 @@ async def index_crawled_urls(
                        continue
                    else:
                        # Content has changed - update the existing document
-                        logger.info(f"Content changed for URL {url}. Updating document.")
+                        logger.info(
+                            f"Content changed for URL {url}. Updating document."
+                        )

                        # Generate summary with metadata
                        user_llm = await get_user_long_context_llm(
@ -236,7 +242,7 @@ async def index_crawled_urls(
                            if language:
                                summary_content += f"Language: {language}\n"
                            summary_content += f"Crawler: {crawler_type}\n\n"
-                            
+
                            # Add content preview
                            content_preview = content[:1000]
                            if len(content) > 1000:
@ -298,7 +304,7 @@ async def index_crawled_urls(
                    if language:
                        summary_content += f"Language: {language}\n"
                    summary_content += f"Crawler: {crawler_type}\n\n"
-                    
+
                    # Add content preview
                    content_preview = content[:1000]
                    if len(content) > 1000:
@ -347,7 +353,7 @@ async def index_crawled_urls(
                continue

        total_processed = documents_indexed + documents_updated
-        
+
        if total_processed > 0:
            await update_connector_last_indexed(session, connector, update_last_indexed)

@ -360,10 +366,14 @@ async def index_crawled_urls(
        # Build result message
        result_message = None
        if failed_urls:
-            failed_summary = "; ".join([f"{url}: {error}" for url, error in failed_urls[:5]])
+            failed_summary = "; ".join(
+                [f"{url}: {error}" for url, error in failed_urls[:5]]
+            )
            if len(failed_urls) > 5:
                failed_summary += f" (and {len(failed_urls) - 5} more)"
-            result_message = f"Completed with {len(failed_urls)} failures: {failed_summary}"
+            result_message = (
+                f"Completed with {len(failed_urls)} failures: {failed_summary}"
+            )

        await task_logger.log_task_success(
            log_entry,
@ -436,4 +446,4 @@ async def get_crawled_url_documents(

    result = await session.execute(query)
    documents = result.scalars().all()
-    return list(documents)
+    return list(documents)
--- a/surfsense_backend/app/utils/periodic_scheduler.py
+++ b/surfsense_backend/app/utils/periodic_scheduler.py
@ -70,6 +70,7 @@ def create_periodic_schedule(
            index_airtable_records_task,
            index_clickup_tasks_task,
            index_confluence_pages_task,
+            index_crawled_urls_task,
            index_discord_messages_task,
            index_elasticsearch_documents_task,
            index_github_repos_task,
@ -80,7 +81,6 @@ def create_periodic_schedule(
            index_luma_events_task,
            index_notion_pages_task,
            index_slack_messages_task,
-            index_crawled_urls_task,
        )

        # Map connector type to task
--- a/surfsense_backend/app/utils/validators.py
+++ b/surfsense_backend/app/utils/validators.py
@ -468,7 +468,7 @@ def validate_connector_config(
        value = config.get(key)
        if not isinstance(value, list) or not value:
            raise ValueError(f"{field_name} must be a non-empty list of strings")
-        
+
    def validate_firecrawl_api_key_format() -> None:
        """Validate Firecrawl API key format if provided."""
        api_key = config.get("FIRECRAWL_API_KEY", "")
@ -477,16 +477,13 @@ def validate_connector_config(
                "Firecrawl API key should start with 'fc-'. Please verify your API key."
            )

-
    def validate_initial_urls() -> None:
        initial_urls = config.get("INITIAL_URLS", "")
        if initial_urls and initial_urls.strip():
            urls = [url.strip() for url in initial_urls.split("\n") if url.strip()]
            for url in urls:
                if not validators.url(url):
-                    raise ValueError(
-                        f"Invalid URL format in INITIAL_URLS: {url}"
-                    )
+                    raise ValueError(f"Invalid URL format in INITIAL_URLS: {url}")

    # Lookup table for connector validation rules
    connector_rules = {