mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-06-04 20:05:16 +02:00
feat(file-storage): add storage service and metadata schema
This commit is contained in:
parent
72174c780a
commit
7065615043
2 changed files with 152 additions and 0 deletions
23
surfsense_backend/app/file_storage/schemas.py
Normal file
23
surfsense_backend/app/file_storage/schemas.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
"""API shapes for document file metadata."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from app.file_storage.persistence.enums import DocumentFileKind
|
||||
|
||||
|
||||
class DocumentFileRead(BaseModel):
|
||||
"""Lightweight metadata for one stored document file (no bytes)."""
|
||||
|
||||
id: int
|
||||
document_id: int
|
||||
kind: DocumentFileKind
|
||||
original_filename: str
|
||||
mime_type: str | None = None
|
||||
size_bytes: int
|
||||
created_at: datetime
|
||||
|
||||
model_config = ConfigDict(from_attributes=True)
|
||||
129
surfsense_backend/app/file_storage/service.py
Normal file
129
surfsense_backend/app/file_storage/service.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Application service: persist, locate, and remove a document's stored files.
|
||||
|
||||
Coordinates the storage backend (bytes) with the ``document_files`` table
|
||||
(metadata). Callers own the surrounding DB transaction/commit.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from collections.abc import AsyncIterator, Sequence
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.file_storage.backends.base import StorageBackend
|
||||
from app.file_storage.factory import get_storage_backend
|
||||
from app.file_storage.keys import build_document_file_key
|
||||
from app.file_storage.persistence.enums import DocumentFileKind
|
||||
from app.file_storage.persistence.models import DocumentFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def store_document_file(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
document_id: int,
|
||||
search_space_id: int,
|
||||
data: bytes,
|
||||
filename: str,
|
||||
mime_type: str | None = None,
|
||||
kind: DocumentFileKind = DocumentFileKind.ORIGINAL,
|
||||
created_by_id: str | UUID | None = None,
|
||||
backend: StorageBackend | None = None,
|
||||
) -> DocumentFile:
|
||||
"""Write bytes to storage and add a ``DocumentFile`` row to the session."""
|
||||
backend = backend or get_storage_backend()
|
||||
key = build_document_file_key(
|
||||
search_space_id=search_space_id,
|
||||
document_id=document_id,
|
||||
kind=kind,
|
||||
filename=filename,
|
||||
)
|
||||
await backend.put(key, data, content_type=mime_type)
|
||||
|
||||
record = DocumentFile(
|
||||
document_id=document_id,
|
||||
search_space_id=search_space_id,
|
||||
kind=kind,
|
||||
storage_backend=backend.backend_name,
|
||||
storage_key=key,
|
||||
original_filename=filename,
|
||||
mime_type=mime_type,
|
||||
size_bytes=len(data),
|
||||
checksum_sha256=hashlib.sha256(data).hexdigest(),
|
||||
created_by_id=created_by_id,
|
||||
)
|
||||
session.add(record)
|
||||
return record
|
||||
|
||||
|
||||
async def list_document_files(
|
||||
session: AsyncSession, *, document_id: int
|
||||
) -> list[DocumentFile]:
|
||||
"""Return all stored files for a document, newest first."""
|
||||
result = await session.execute(
|
||||
select(DocumentFile)
|
||||
.where(DocumentFile.document_id == document_id)
|
||||
.order_by(DocumentFile.created_at.desc())
|
||||
)
|
||||
return list(result.scalars().all())
|
||||
|
||||
|
||||
async def get_document_file(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
document_id: int,
|
||||
kind: DocumentFileKind = DocumentFileKind.ORIGINAL,
|
||||
) -> DocumentFile | None:
|
||||
"""Return the most recent stored file of ``kind`` for a document."""
|
||||
result = await session.execute(
|
||||
select(DocumentFile)
|
||||
.where(
|
||||
DocumentFile.document_id == document_id,
|
||||
DocumentFile.kind == kind,
|
||||
)
|
||||
.order_by(DocumentFile.created_at.desc())
|
||||
)
|
||||
return result.scalars().first()
|
||||
|
||||
|
||||
def open_document_file_stream(
|
||||
record: DocumentFile, *, backend: StorageBackend | None = None
|
||||
) -> AsyncIterator[bytes]:
|
||||
"""Open a chunked byte stream for a stored file."""
|
||||
backend = backend or get_storage_backend()
|
||||
return backend.open_stream(record.storage_key)
|
||||
|
||||
|
||||
async def purge_document_blobs(
|
||||
session: AsyncSession,
|
||||
*,
|
||||
document_ids: Sequence[int],
|
||||
backend: StorageBackend | None = None,
|
||||
) -> None:
|
||||
"""Delete stored blobs for the given documents.
|
||||
|
||||
Call this before the ``document_files`` rows are removed (they cascade with
|
||||
the document). Best-effort: a failed blob delete is logged, not raised, so
|
||||
document deletion is never blocked by an orphaned blob.
|
||||
"""
|
||||
if not document_ids:
|
||||
return
|
||||
|
||||
backend = backend or get_storage_backend()
|
||||
result = await session.execute(
|
||||
select(DocumentFile.storage_key).where(
|
||||
DocumentFile.document_id.in_(document_ids)
|
||||
)
|
||||
)
|
||||
for storage_key in result.scalars().all():
|
||||
try:
|
||||
await backend.delete(storage_key)
|
||||
except Exception as delete_error:
|
||||
logger.warning(
|
||||
"Failed to delete stored blob %s: %s", storage_key, delete_error
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue