chore: linting

2026-06-18 21:15:16 +02:00 · 2026-06-17 22:31:36 -07:00 · 2026-06-17 22:31:36 -07:00 · 55f91a29d5
commit 55f91a29d5
parent c6d42fc7c8
16 changed files with 50 additions and 40 deletions
--- a/surfsense_backend/alembic/versions/164_remove_inactive_users.py
+++ b/surfsense_backend/alembic/versions/164_remove_inactive_users.py
@ -68,7 +68,7 @@ def upgrade() -> None:
        # has NULL last_login -> the migration is idempotent and resumable.
        op.execute(f"DROP TABLE IF EXISTS {USER_SCRATCH};")
        op.execute(
-            f'CREATE UNLOGGED TABLE {USER_SCRATCH} AS '
+            f"CREATE UNLOGGED TABLE {USER_SCRATCH} AS "
            'SELECT id FROM "user" WHERE last_login IS NULL;'
        )
        op.execute(f"ALTER TABLE {USER_SCRATCH} ADD PRIMARY KEY (id);")
--- a/surfsense_backend/alembic/versions/165_add_chunk_position.py
+++ b/surfsense_backend/alembic/versions/165_add_chunk_position.py
@ -87,11 +87,11 @@ def upgrade() -> None:
            ).scalar()
            or 0
        )
-        total_rows_display = f"~{total_rows:,}" if total_rows > 0 else "an unknown number of"
+        total_rows_display = (
+            f"~{total_rows:,}" if total_rows > 0 else "an unknown number of"
+        )

-        bounds = bind.execute(
-            sa.text("SELECT min(id), max(id) FROM chunks")
-        ).one()
+        bounds = bind.execute(sa.text("SELECT min(id), max(id) FROM chunks")).one()
        min_id, max_id = bounds[0], bounds[1]

        if min_id is None:
@ -167,9 +167,7 @@ def upgrade() -> None:
            op.execute(f"DROP TABLE IF EXISTS {SCRATCH_TABLE};")

        logger.info("creating index ix_chunks_position...")
-        op.execute(
-            "CREATE INDEX IF NOT EXISTS ix_chunks_position ON chunks(position);"
-        )
+        op.execute("CREATE INDEX IF NOT EXISTS ix_chunks_position ON chunks(position);")
        logger.info("creating index ix_chunks_document_id_position...")
        op.execute(
            "CREATE INDEX IF NOT EXISTS ix_chunks_document_id_position "
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -927,7 +927,9 @@ class Config:
        AZURE_DI_KEY = os.getenv("AZURE_DI_KEY")

    # ETL parse cache: reuse parser output for identical bytes across workspaces.
-    ETL_CACHE_ENABLED = os.getenv("ETL_CACHE_ENABLED", "false").strip().lower() == "true"
+    ETL_CACHE_ENABLED = (
+        os.getenv("ETL_CACHE_ENABLED", "false").strip().lower() == "true"
+    )
    # Bump to invalidate every cached entry after a parser/behaviour change.
    ETL_CACHE_PARSER_VERSION = int(os.getenv("ETL_CACHE_PARSER_VERSION", "1"))
    ETL_CACHE_TTL_DAYS = int(os.getenv("ETL_CACHE_TTL_DAYS", "90"))
@ -948,7 +950,9 @@ class Config:
        os.getenv("EMBEDDING_CACHE_CHUNKER_VERSION", "1")
    )
    EMBEDDING_CACHE_TTL_DAYS = int(os.getenv("EMBEDDING_CACHE_TTL_DAYS", "90"))
-    EMBEDDING_CACHE_MAX_TOTAL_MB = int(os.getenv("EMBEDDING_CACHE_MAX_TOTAL_MB", "5120"))
+    EMBEDDING_CACHE_MAX_TOTAL_MB = int(
+        os.getenv("EMBEDDING_CACHE_MAX_TOTAL_MB", "5120")
+    )
    EMBEDDING_CACHE_EVICTION_BATCH = int(
        os.getenv("EMBEDDING_CACHE_EVICTION_BATCH", "500")
    )
--- a/surfsense_backend/app/etl_pipeline/cache/cached_extraction.py
+++ b/surfsense_backend/app/etl_pipeline/cache/cached_extraction.py
@ -20,9 +20,7 @@ logger = logging.getLogger(__name__)
 _HASH_CHUNK = 1024 * 1024


-async def extract_with_cache(
-    request: EtlRequest, *, vision_llm=None
-) -> EtlResult:
+async def extract_with_cache(request: EtlRequest, *, vision_llm=None) -> EtlResult:
    """Drop-in for ``EtlPipelineService.extract`` that reuses prior parser output."""
    settings = load_etl_cache_settings()

--- a/surfsense_backend/app/etl_pipeline/cache/eviction/task.py
+++ b/surfsense_backend/app/etl_pipeline/cache/eviction/task.py
@ -34,7 +34,9 @@ async def _evict() -> None:
        index = CachedParseRepository(session)

        cutoff = datetime.now(UTC) - timedelta(days=settings.ttl_days)
-        expired = await index.select_expired(cutoff=cutoff, limit=settings.eviction_batch)
+        expired = await index.select_expired(
+            cutoff=cutoff, limit=settings.eviction_batch
+        )
        await _drop(index, store, expired, phase="ttl")

        total = await index.total_size_bytes()
--- a/surfsense_backend/app/etl_pipeline/cache/storage/backend.py
+++ b/surfsense_backend/app/etl_pipeline/cache/storage/backend.py
@ -38,7 +38,9 @@ def resolve_cache_backend() -> StorageBackend:

    if backend == "local":
        if not settings.storage_local_root:
-            raise ValueError("ETL_CACHE_STORAGE_LOCAL_PATH is required for local cache.")
+            raise ValueError(
+                "ETL_CACHE_STORAGE_LOCAL_PATH is required for local cache."
+            )
        from app.file_storage.backends.local import LocalFileBackend

        return LocalFileBackend(settings.storage_local_root)
--- a/surfsense_backend/app/indexing_pipeline/cache/serialization.py
+++ b/surfsense_backend/app/indexing_pipeline/cache/serialization.py
@ -31,7 +31,9 @@ def serialize(embedding_set: EmbeddingSet) -> bytes:
    for chunk in embedding_set.chunks:
        vector = np.asarray(chunk.embedding, dtype=np.float32).reshape(-1)
        if vector.shape[0] != dim:
-            raise ValueError("All vectors in an embedding set must share one dimension.")
+            raise ValueError(
+                "All vectors in an embedding set must share one dimension."
+            )
        rows.append(vector)
        texts.append(chunk.text)

@ -67,5 +69,7 @@ def deserialize(blob: bytes) -> EmbeddingSet:

    return EmbeddingSet(
        summary_embedding=matrix[0],
-        chunks=[CachedChunk(text=texts[i], embedding=matrix[i + 1]) for i in range(count)],
+        chunks=[
+            CachedChunk(text=texts[i], embedding=matrix[i + 1]) for i in range(count)
+        ],
    )
--- a/surfsense_backend/app/indexing_pipeline/cache/storage/embedding_store.py
+++ b/surfsense_backend/app/indexing_pipeline/cache/storage/embedding_store.py
@ -22,13 +22,13 @@ class EmbeddingCacheStore:
    def backend_name(self) -> str:
        return self._backend.backend_name

-    async def save(self, key: EmbeddingKey, embedding_set: EmbeddingSet) -> tuple[str, int]:
+    async def save(
+        self, key: EmbeddingKey, embedding_set: EmbeddingSet
+    ) -> tuple[str, int]:
        """Persist the embedding set and return its storage key and byte size."""
        blob = serialize(embedding_set)
        storage_key = build_embedding_object_key(key)
-        await self._backend.put(
-            storage_key, blob, content_type=_EMBEDDING_CONTENT_TYPE
-        )
+        await self._backend.put(storage_key, blob, content_type=_EMBEDDING_CONTENT_TYPE)
        return storage_key, len(blob)

    async def load(self, storage_key: str) -> EmbeddingSet:
--- a/surfsense_backend/app/routes/editor_routes.py
+++ b/surfsense_backend/app/routes/editor_routes.py
@ -86,8 +86,7 @@ async def get_editor_content(
        size_bytes = len(md.encode("utf-8"))
        line_count = md.count("\n") + 1
        too_large = (
-            size_bytes > EDITOR_PLATE_MAX_BYTES
-            or line_count > EDITOR_PLATE_MAX_LINES
+            size_bytes > EDITOR_PLATE_MAX_BYTES or line_count > EDITOR_PLATE_MAX_LINES
        )
        viewer_mode = "monaco" if too_large else "plate"
        return {
--- a/surfsense_backend/tests/integration/etl_pipeline/cache/test_cached_extraction.py
+++ b/surfsense_backend/tests/integration/etl_pipeline/cache/test_cached_extraction.py
@ -66,9 +66,7 @@ async def test_identical_uploads_are_parsed_once_then_served_from_cache(
    assert second.content_type == "application/pdf"


-async def test_disabled_cache_parses_every_time(
-    tmp_path, monkeypatch, counting_parser
-):
+async def test_disabled_cache_parses_every_time(tmp_path, monkeypatch, counting_parser):
    monkeypatch.setattr(config, "ETL_CACHE_ENABLED", False)
    monkeypatch.setattr(config, "ETL_SERVICE", "LLAMACLOUD")

--- a/surfsense_backend/tests/integration/etl_pipeline/cache/test_cached_parse_repository.py
+++ b/surfsense_backend/tests/integration/etl_pipeline/cache/test_cached_parse_repository.py
@ -18,9 +18,7 @@ pytestmark = pytest.mark.integration


 def _key(sha: str) -> ParseKey:
-    return ParseKey.for_document(
-        sha, etl_service="LLAMACLOUD", mode="basic", version=1
-    )
+    return ParseKey.for_document(sha, etl_service="LLAMACLOUD", mode="basic", version=1)


 async def _insert(repo, *, sha, size=100, storage_key=None):
--- a/surfsense_backend/tests/integration/etl_pipeline/cache/test_etl_cache_service.py
+++ b/surfsense_backend/tests/integration/etl_pipeline/cache/test_etl_cache_service.py
@ -17,9 +17,7 @@ pytestmark = pytest.mark.integration


 def _key(sha: str = "c" * 64) -> ParseKey:
-    return ParseKey.for_document(
-        sha, etl_service="LLAMACLOUD", mode="basic", version=1
-    )
+    return ParseKey.for_document(sha, etl_service="LLAMACLOUD", mode="basic", version=1)


 async def test_recall_is_a_miss_for_an_unknown_key(db_session, cache_local_storage):
--- a/surfsense_backend/tests/integration/etl_pipeline/cache/test_eviction_task.py
+++ b/surfsense_backend/tests/integration/etl_pipeline/cache/test_eviction_task.py
@ -20,9 +20,7 @@ pytestmark = pytest.mark.integration


 def _key(sha: str) -> ParseKey:
-    return ParseKey.for_document(
-        sha, etl_service="LLAMACLOUD", mode="basic", version=1
-    )
+    return ParseKey.for_document(sha, etl_service="LLAMACLOUD", mode="basic", version=1)


 def _result(markdown: str) -> EtlResult:
@ -48,7 +46,9 @@ async def test_expired_entries_are_pruned(
    monkeypatch, cache_local_storage, clean_cache_table
 ):
    monkeypatch.setattr(config, "ETL_CACHE_ENABLED", True)
-    monkeypatch.setattr(config, "ETL_CACHE_TTL_DAYS", -1)  # cutoff in the future -> stale
+    monkeypatch.setattr(
+        config, "ETL_CACHE_TTL_DAYS", -1
+    )  # cutoff in the future -> stale
    monkeypatch.setattr(config, "ETL_CACHE_MAX_TOTAL_MB", 10_000)  # size phase no-op

    key = _key("a" * 64)
--- a/surfsense_backend/tests/integration/indexing_pipeline/cache/test_embedding_cache_service.py
+++ b/surfsense_backend/tests/integration/indexing_pipeline/cache/test_embedding_cache_service.py
@ -38,8 +38,12 @@ async def test_remembered_set_recalls_as_equivalent_vectors(
    stored = EmbeddingSet(
        summary_embedding=np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float32),
        chunks=[
-            CachedChunk("first chunk", np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)),
-            CachedChunk("second chunk", np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float32)),
+            CachedChunk(
+                "first chunk", np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
+            ),
+            CachedChunk(
+                "second chunk", np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float32)
+            ),
        ],
    )

--- a/surfsense_backend/tests/integration/test_connector_index_authz.py
+++ b/surfsense_backend/tests/integration/test_connector_index_authz.py
@ -71,7 +71,10 @@ async def _make_connector(
        connector_type=connector_type,
        # A stored credential the indexer would use — the thing a cross-tenant
        # index must never be able to abuse.
-        config={"GITHUB_PAT": "victim-secret-pat", "repo_full_names": ["octocat/Hello-World"]},
+        config={
+            "GITHUB_PAT": "victim-secret-pat",
+            "repo_full_names": ["octocat/Hello-World"],
+        },
        is_indexable=True,
        search_space_id=space.id,
        user_id=owner.id,
--- a/surfsense_backend/tests/unit/indexing_pipeline/cache/test_serialization.py
+++ b/surfsense_backend/tests/unit/indexing_pipeline/cache/test_serialization.py
@ -23,7 +23,9 @@ def test_round_trip_preserves_texts_and_vectors():

    assert [c.text for c in restored.chunks] == [c.text for c in original.chunks]
    assert restored.chunk_count == 3
-    assert np.allclose(restored.summary_embedding, original.summary_embedding, atol=1e-6)
+    assert np.allclose(
+        restored.summary_embedding, original.summary_embedding, atol=1e-6
+    )
    for got, want in zip(restored.chunks, original.chunks, strict=True):
        assert np.allclose(got.embedding, want.embedding, atol=1e-6)