chore: ran linting

2026-05-01 03:46:25 +02:00 · 2026-02-26 03:05:20 +05:30 · 2026-02-26 03:05:20 +05:30 · 9ccee054a5
commit 9ccee054a5
parent 7332be956e
24 changed files with 368 additions and 151 deletions
--- a/surfsense_backend/app/agents/new_chat/sandbox.py
+++ b/surfsense_backend/app/agents/new_chat/sandbox.py
@ -147,7 +147,9 @@ async def delete_sandbox(thread_id: int | str) -> None:
        try:
            sandbox = client.find_one(labels=labels)
        except DaytonaError:
-            logger.debug("No sandbox to delete for thread %s (already removed)", thread_id)
+            logger.debug(
+                "No sandbox to delete for thread %s (already removed)", thread_id
+            )
            return
        try:
            client.delete(sandbox)
@ -166,6 +168,7 @@ async def delete_sandbox(thread_id: int | str) -> None:
 # Local file persistence
 # ---------------------------------------------------------------------------

+
 def _get_sandbox_files_dir() -> Path:
    return Path(os.environ.get("SANDBOX_FILES_DIR", "sandbox_files"))

--- a/surfsense_backend/app/indexing_pipeline/connector_document.py
+++ b/surfsense_backend/app/indexing_pipeline/connector_document.py
@ -5,6 +5,7 @@ from app.db import DocumentType

 class ConnectorDocument(BaseModel):
    """Canonical data transfer object produced by connector adapters and consumed by the indexing pipeline."""
+
    title: str
    source_markdown: str
    unique_id: str
--- a/surfsense_backend/app/indexing_pipeline/document_chunker.py
+++ b/surfsense_backend/app/indexing_pipeline/document_chunker.py
@ -3,5 +3,7 @@ from app.config import config

 def chunk_text(text: str, use_code_chunker: bool = False) -> list[str]:
    """Chunk a text string using the configured chunker and return the chunk texts."""
-    chunker = config.code_chunker_instance if use_code_chunker else config.chunker_instance
+    chunker = (
+        config.code_chunker_instance if use_code_chunker else config.chunker_instance
+    )
    return [c.text for c in chunker.chunk(text)]
--- a/surfsense_backend/app/indexing_pipeline/document_summarizer.py
+++ b/surfsense_backend/app/indexing_pipeline/document_summarizer.py
@ -2,7 +2,9 @@ from app.prompts import SUMMARY_PROMPT_TEMPLATE
 from app.utils.document_converters import optimize_content_for_context_window


-async def summarize_document(source_markdown: str, llm, metadata: dict | None = None) -> str:
+async def summarize_document(
+    source_markdown: str, llm, metadata: dict | None = None
+) -> str:
    """Generate a text summary of a document using an LLM, prefixed with metadata when provided."""
    model_name = getattr(llm, "model", "gpt-3.5-turbo")
    optimized_content = optimize_content_for_context_window(
--- a/surfsense_backend/app/indexing_pipeline/exceptions.py
+++ b/surfsense_backend/app/indexing_pipeline/exceptions.py
@ -12,7 +12,6 @@ from litellm.exceptions import (
    Timeout,
    UnprocessableEntityError,
 )
-from sqlalchemy.exc import IntegrityError

 # Tuples for use directly in except clauses.
 RETRYABLE_LLM_ERRORS = (
@ -36,29 +35,33 @@ PERMANENT_LLM_ERRORS = (
 # (LiteLLMEmbeddings, CohereEmbeddings, GeminiEmbeddings all normalize to RuntimeError).
 EMBEDDING_ERRORS = (
    RuntimeError,  # local device failure or API backend normalization
-    OSError,       # model files missing or corrupted (local backends)
-    MemoryError,   # document too large for available RAM
+    OSError,  # model files missing or corrupted (local backends)
+    MemoryError,  # document too large for available RAM
 )


 class PipelineMessages:
-    RATE_LIMIT        = "LLM rate limit exceeded. Will retry on next sync."
-    LLM_TIMEOUT       = "LLM request timed out. Will retry on next sync."
-    LLM_UNAVAILABLE   = "LLM service temporarily unavailable. Will retry on next sync."
-    LLM_BAD_GATEWAY   = "LLM gateway error. Will retry on next sync."
-    LLM_SERVER_ERROR  = "LLM internal server error. Will retry on next sync."
-    LLM_CONNECTION    = "Could not reach the LLM service. Check network connectivity."
+    RATE_LIMIT = "LLM rate limit exceeded. Will retry on next sync."
+    LLM_TIMEOUT = "LLM request timed out. Will retry on next sync."
+    LLM_UNAVAILABLE = "LLM service temporarily unavailable. Will retry on next sync."
+    LLM_BAD_GATEWAY = "LLM gateway error. Will retry on next sync."
+    LLM_SERVER_ERROR = "LLM internal server error. Will retry on next sync."
+    LLM_CONNECTION = "Could not reach the LLM service. Check network connectivity."

-    LLM_AUTH          = "LLM authentication failed. Check your API key."
-    LLM_PERMISSION    = "LLM request denied. Check your account permissions."
-    LLM_NOT_FOUND     = "LLM model not found. Check your model configuration."
-    LLM_BAD_REQUEST   = "LLM rejected the request. Document content may be invalid."
-    LLM_UNPROCESSABLE = "Document exceeds the LLM context window even after optimization."
-    LLM_RESPONSE      = "LLM returned an invalid response."
+    LLM_AUTH = "LLM authentication failed. Check your API key."
+    LLM_PERMISSION = "LLM request denied. Check your account permissions."
+    LLM_NOT_FOUND = "LLM model not found. Check your model configuration."
+    LLM_BAD_REQUEST = "LLM rejected the request. Document content may be invalid."
+    LLM_UNPROCESSABLE = (
+        "Document exceeds the LLM context window even after optimization."
+    )
+    LLM_RESPONSE = "LLM returned an invalid response."

-    EMBEDDING_FAILED  = "Embedding failed. Check your embedding model configuration or service."
-    EMBEDDING_MODEL   = "Embedding model files are missing or corrupted."
-    EMBEDDING_MEMORY  = "Not enough memory to embed this document."
+    EMBEDDING_FAILED = (
+        "Embedding failed. Check your embedding model configuration or service."
+    )
+    EMBEDDING_MODEL = "Embedding model files are missing or corrupted."
+    EMBEDDING_MEMORY = "Not enough memory to embed this document."

    CHUNKING_OVERFLOW = "Document structure is too deeply nested to chunk."

--- a/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py
+++ b/surfsense_backend/app/indexing_pipeline/indexing_pipeline_service.py
@ -2,6 +2,7 @@ import contextlib
 from datetime import UTC, datetime

 from sqlalchemy import delete, select
+from sqlalchemy.exc import IntegrityError
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.db import Chunk, Document, DocumentStatus
@ -21,7 +22,6 @@ from app.indexing_pipeline.exceptions import (
    EMBEDDING_ERRORS,
    PERMANENT_LLM_ERRORS,
    RETRYABLE_LLM_ERRORS,
-    IntegrityError,
    PipelineMessages,
    embedding_message,
    llm_permanent_message,
--- a/surfsense_backend/app/indexing_pipeline/pipeline_logger.py
+++ b/surfsense_backend/app/indexing_pipeline/pipeline_logger.py
@ -8,27 +8,29 @@ logger = logging.getLogger(__name__)
 class PipelineLogContext:
    connector_id: int | None
    search_space_id: int
-    unique_id: str            # always available from ConnectorDocument
-    doc_id: int | None = None # set once the DB row exists (index phase only)
+    unique_id: str  # always available from ConnectorDocument
+    doc_id: int | None = None  # set once the DB row exists (index phase only)


 class LogMessages:
    # prepare_for_indexing
-    DOCUMENT_QUEUED     = "New document queued for indexing."
-    DOCUMENT_UPDATED    = "Document content changed, re-queued for indexing."
-    DOCUMENT_REQUEUED   = "Stuck document re-queued for indexing."
+    DOCUMENT_QUEUED = "New document queued for indexing."
+    DOCUMENT_UPDATED = "Document content changed, re-queued for indexing."
+    DOCUMENT_REQUEUED = "Stuck document re-queued for indexing."
    DOC_SKIPPED_UNKNOWN = "Unexpected error — document skipped."
-    BATCH_ABORTED       = "Fatal DB error — aborting prepare batch."
-    RACE_CONDITION      = "Concurrent worker beat us to the commit — rolling back batch."
+    BATCH_ABORTED = "Fatal DB error — aborting prepare batch."
+    RACE_CONDITION = "Concurrent worker beat us to the commit — rolling back batch."

    # index
-    INDEX_STARTED       = "Document indexing started."
-    INDEX_SUCCESS       = "Document indexed successfully."
-    LLM_RETRYABLE       = "Retryable LLM error — document marked failed, will retry on next sync."
-    LLM_PERMANENT       = "Permanent LLM error — document marked failed."
-    EMBEDDING_FAILED    = "Embedding error — document marked failed."
-    CHUNKING_OVERFLOW   = "Chunking overflow — document marked failed."
-    UNEXPECTED          = "Unexpected error — document marked failed."
+    INDEX_STARTED = "Document indexing started."
+    INDEX_SUCCESS = "Document indexed successfully."
+    LLM_RETRYABLE = (
+        "Retryable LLM error — document marked failed, will retry on next sync."
+    )
+    LLM_PERMANENT = "Permanent LLM error — document marked failed."
+    EMBEDDING_FAILED = "Embedding error — document marked failed."
+    CHUNKING_OVERFLOW = "Chunking overflow — document marked failed."
+    UNEXPECTED = "Unexpected error — document marked failed."


 def _format_context(ctx: PipelineLogContext) -> str:
@ -52,7 +54,9 @@ def _build_message(msg: str, ctx: PipelineLogContext, **extra) -> str:
        return msg


-def _safe_log(level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extra) -> None:
+def _safe_log(
+    level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extra
+) -> None:
    # Logging must never raise — a broken log call inside an except block would
    # chain with the original exception and mask it entirely.
    try:
@ -64,6 +68,7 @@ def _safe_log(level_fn, msg: str, ctx: PipelineLogContext, exc_info=None, **extr

 # ── prepare_for_indexing ──────────────────────────────────────────────────────

+
 def log_document_queued(ctx: PipelineLogContext) -> None:
    _safe_log(logger.info, LogMessages.DOCUMENT_QUEUED, ctx)

@ -77,7 +82,9 @@ def log_document_requeued(ctx: PipelineLogContext) -> None:


 def log_doc_skipped_unknown(ctx: PipelineLogContext, exc: Exception) -> None:
-    _safe_log(logger.warning, LogMessages.DOC_SKIPPED_UNKNOWN, ctx, exc_info=exc, error=exc)
+    _safe_log(
+        logger.warning, LogMessages.DOC_SKIPPED_UNKNOWN, ctx, exc_info=exc, error=exc
+    )


 def log_race_condition(ctx: PipelineLogContext) -> None:
@ -90,6 +97,7 @@ def log_batch_aborted(ctx: PipelineLogContext, exc: Exception) -> None:

 # ── index ─────────────────────────────────────────────────────────────────────

+
 def log_index_started(ctx: PipelineLogContext) -> None:
    _safe_log(logger.info, LogMessages.INDEX_STARTED, ctx)

--- a/surfsense_backend/app/routes/new_chat_routes.py
+++ b/surfsense_backend/app/routes/new_chat_routes.py
@ -10,6 +10,8 @@ These endpoints support the ThreadHistoryAdapter pattern from assistant-ui:
 - POST /threads/{thread_id}/messages - Append message
 """

+import asyncio
+import logging
 from datetime import UTC, datetime

 from fastapi import APIRouter, Depends, HTTPException, Request
@ -52,9 +54,6 @@ from app.tasks.chat.stream_new_chat import stream_new_chat, stream_resume_chat
 from app.users import current_active_user
 from app.utils.rbac import check_permission

-import asyncio
-import logging
-
 _logger = logging.getLogger(__name__)

 router = APIRouter()
@ -75,11 +74,19 @@ def _try_delete_sandbox(thread_id: int) -> None:
        try:
            await delete_sandbox(thread_id)
        except Exception:
-            _logger.warning("Background sandbox delete failed for thread %s", thread_id, exc_info=True)
+            _logger.warning(
+                "Background sandbox delete failed for thread %s",
+                thread_id,
+                exc_info=True,
+            )
        try:
            delete_local_sandbox_files(thread_id)
        except Exception:
-            _logger.warning("Local sandbox file cleanup failed for thread %s", thread_id, exc_info=True)
+            _logger.warning(
+                "Local sandbox file cleanup failed for thread %s",
+                thread_id,
+                exc_info=True,
+            )

    try:
        loop = asyncio.get_running_loop()
--- a/surfsense_backend/app/routes/sandbox_routes.py
+++ b/surfsense_backend/app/routes/sandbox_routes.py
@ -87,7 +87,7 @@ async def download_sandbox_file(
    # Fall back to live sandbox download
    try:
        sandbox = await get_or_create_sandbox(thread_id)
-        raw_sandbox = sandbox._sandbox  # noqa: SLF001
+        raw_sandbox = sandbox._sandbox
        content: bytes = await asyncio.to_thread(raw_sandbox.fs.download_file, path)
    except Exception as exc:
        logger.warning("Sandbox file download failed for %s: %s", path, exc)
--- a/surfsense_backend/app/tasks/chat/stream_new_chat.py
+++ b/surfsense_backend/app/tasks/chat/stream_new_chat.py
@ -877,7 +877,9 @@ async def _stream_agent_events(
                    output_text = om.group(1) if om else ""
                thread_id_str = config.get("configurable", {}).get("thread_id", "")

-                for sf_match in re.finditer(r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE):
+                for sf_match in re.finditer(
+                    r"^SANDBOX_FILE:\s*(.+)$", output_text, re.MULTILINE
+                ):
                    fpath = sf_match.group(1).strip()
                    if fpath and fpath not in result.sandbox_files:
                        result.sandbox_files.append(fpath)
@ -963,7 +965,10 @@ def _try_persist_and_delete_sandbox(
    sandbox_files: list[str],
 ) -> None:
    """Fire-and-forget: persist sandbox files locally then delete the sandbox."""
-    from app.agents.new_chat.sandbox import is_sandbox_enabled, persist_and_delete_sandbox
+    from app.agents.new_chat.sandbox import (
+        is_sandbox_enabled,
+        persist_and_delete_sandbox,
+    )

    if not is_sandbox_enabled():
        return
--- a/surfsense_backend/app/tasks/document_processors/file_processors.py
+++ b/surfsense_backend/app/tasks/document_processors/file_processors.py
@ -1886,11 +1886,11 @@ async def process_file_in_background_with_document(
        await task_logger.log_task_success(
            log_entry,
            f"Successfully processed file: {filename}",
-        {
+            {
                "document_id": doc_id,
                "content_hash": content_hash,
                "file_type": etl_service,
-        },
+            },
        )

        return document