feat: support other s3 sig versions so it works with s3 (#461)

This commit is contained in:
Sky Moore 2026-06-24 14:06:34 +01:00 committed by GitHub
parent 811b9e9803
commit 1e2a276a61
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 116 additions and 8 deletions

View file

@ -18,6 +18,13 @@ ENABLE_AWS_S3="false"
# AWS_SECRET_ACCESS_KEY=""
# S3_BUCKET=""
# S3_REGION=""
# --- S3-compatible servers (MinIO, rustfs, Ceph, ...) ---
# Use the S3 backend (ENABLE_AWS_S3=true) against a non-AWS, S3-compatible
# server by overriding the endpoint and signing. Unlike the MinIO backend, the
# S3 backend emits real presigned URLs, so the bucket can stay private.
# S3_ENDPOINT_URL="" # e.g. https://s3.example.com (blank = AWS default)
# S3_SIGNATURE_VERSION="" # blank = botocore default; set "s3v4" if the server requires SigV4
# S3_ADDRESSING_STYLE="" # blank = auto; set "path" if the server / TLS cert requires path-style
# MinIO Configuration if using containerised MinIO instead of
# AWS S3

View file

@ -53,6 +53,17 @@ MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
# AWS S3 Configuration
S3_BUCKET = os.environ.get("S3_BUCKET")
S3_REGION = os.environ.get("S3_REGION", "us-east-1")
# Optional overrides for S3-compatible backends (e.g. MinIO, rustfs, Ceph).
# S3_ENDPOINT_URL: full URL of a custom S3 endpoint (e.g. "https://s3.example.com").
# Leave unset to use AWS's default endpoint resolution.
# S3_SIGNATURE_VERSION: botocore signature version used to sign requests and
# presigned URLs. Defaults to None (botocore's default, currently SigV2 for
# presigned URLs). Set to "s3v4" for S3-compatible servers that require SigV4.
# S3_ADDRESSING_STYLE: "auto" (default), "path", or "virtual". Many S3-compatible
# servers and TLS setups require "path".
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL")
S3_SIGNATURE_VERSION = os.environ.get("S3_SIGNATURE_VERSION")
S3_ADDRESSING_STYLE = os.environ.get("S3_ADDRESSING_STYLE")
# Sentry configuration
SENTRY_DSN = os.getenv("SENTRY_DSN")

View file

@ -1,29 +1,84 @@
from typing import Any, BinaryIO, Dict, Optional
import aioboto3
from botocore.config import Config
from botocore.exceptions import ClientError
from api.constants import (
S3_ADDRESSING_STYLE,
S3_ENDPOINT_URL,
S3_SIGNATURE_VERSION,
)
from .base import BaseFileSystem
class S3FileSystem(BaseFileSystem):
"""S3 implementation of the filesystem interface."""
def __init__(self, bucket_name: str, region_name: str = "us-east-1"):
def __init__(
self,
bucket_name: str,
region_name: str = "us-east-1",
endpoint_url: Optional[str] = None,
signature_version: Optional[str] = None,
addressing_style: Optional[str] = None,
):
"""Initialize S3 filesystem.
Args:
bucket_name: Name of the S3 bucket
region_name: AWS region name
endpoint_url: Optional custom S3 endpoint (e.g. for MinIO/rustfs).
Defaults to ``S3_ENDPOINT_URL`` env var; ``None`` uses AWS.
signature_version: Optional botocore signature version (e.g.
``"s3v4"``). Defaults to ``S3_SIGNATURE_VERSION`` env var;
``None`` keeps botocore's default signing behavior.
addressing_style: Optional S3 addressing style (``"path"`` /
``"virtual"`` / ``"auto"``). Defaults to ``S3_ADDRESSING_STYLE``
env var; ``None`` keeps botocore's default.
"""
self.bucket_name = bucket_name
self.region_name = region_name
self.endpoint_url = (
endpoint_url if endpoint_url is not None else S3_ENDPOINT_URL
)
signature_version = (
signature_version
if signature_version is not None
else S3_SIGNATURE_VERSION
)
addressing_style = (
addressing_style if addressing_style is not None else S3_ADDRESSING_STYLE
)
self.session = aioboto3.Session()
# Build a botocore Config only when an override is requested so that the
# default behavior is byte-for-byte unchanged when no env vars are set.
config_kwargs: Dict[str, Any] = {}
if signature_version:
config_kwargs["signature_version"] = signature_version
if addressing_style:
config_kwargs["s3"] = {"addressing_style": addressing_style}
self._config = Config(**config_kwargs) if config_kwargs else None
def _client_kwargs(self) -> Dict[str, Any]:
"""Common kwargs for every ``session.client("s3", ...)`` call.
Only includes ``endpoint_url`` / ``config`` when configured, so default
deployments behave exactly as before.
"""
kwargs: Dict[str, Any] = {"region_name": self.region_name}
if self.endpoint_url:
kwargs["endpoint_url"] = self.endpoint_url
if self._config is not None:
kwargs["config"] = self._config
return kwargs
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
await s3_client.put_object(
Bucket=self.bucket_name, Key=file_path, Body=await content.read()
@ -35,7 +90,7 @@ class S3FileSystem(BaseFileSystem):
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
await s3_client.upload_file(
local_path, self.bucket_name, destination_path
@ -60,7 +115,7 @@ class S3FileSystem(BaseFileSystem):
"""
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
params = {"Bucket": self.bucket_name, "Key": file_path}
@ -101,7 +156,7 @@ class S3FileSystem(BaseFileSystem):
"""Get S3 object metadata."""
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
response = await s3_client.head_object(
Bucket=self.bucket_name, Key=file_path
@ -127,7 +182,7 @@ class S3FileSystem(BaseFileSystem):
"""Generate a presigned PUT URL for direct file upload."""
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
url = await s3_client.generate_presigned_url(
"put_object",
@ -146,7 +201,7 @@ class S3FileSystem(BaseFileSystem):
"""Download a file from S3 to local path."""
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
await s3_client.download_file(self.bucket_name, source_path, local_path)
return True
@ -157,7 +212,7 @@ class S3FileSystem(BaseFileSystem):
"""Copy a file within S3 (server-side copy)."""
try:
async with self.session.client(
"s3", region_name=self.region_name
"s3", **self._client_kwargs()
) as s3_client:
await s3_client.copy_object(
Bucket=self.bucket_name,