mirror of
https://github.com/dograh-hq/dograh.git
synced 2026-06-25 08:48:13 +02:00
feat: support other s3 sig versions so it works with s3 (#461)
This commit is contained in:
parent
811b9e9803
commit
1e2a276a61
5 changed files with 116 additions and 8 deletions
|
|
@ -18,6 +18,13 @@ ENABLE_AWS_S3="false"
|
|||
# AWS_SECRET_ACCESS_KEY=""
|
||||
# S3_BUCKET=""
|
||||
# S3_REGION=""
|
||||
# --- S3-compatible servers (MinIO, rustfs, Ceph, ...) ---
|
||||
# Use the S3 backend (ENABLE_AWS_S3=true) against a non-AWS, S3-compatible
|
||||
# server by overriding the endpoint and signing. Unlike the MinIO backend, the
|
||||
# S3 backend emits real presigned URLs, so the bucket can stay private.
|
||||
# S3_ENDPOINT_URL="" # e.g. https://s3.example.com (blank = AWS default)
|
||||
# S3_SIGNATURE_VERSION="" # blank = botocore default; set "s3v4" if the server requires SigV4
|
||||
# S3_ADDRESSING_STYLE="" # blank = auto; set "path" if the server / TLS cert requires path-style
|
||||
|
||||
# MinIO Configuration if using containerised MinIO instead of
|
||||
# AWS S3
|
||||
|
|
|
|||
|
|
@ -53,6 +53,17 @@ MINIO_SECURE = os.getenv("MINIO_SECURE", "false").lower() == "true"
|
|||
# AWS S3 Configuration
|
||||
S3_BUCKET = os.environ.get("S3_BUCKET")
|
||||
S3_REGION = os.environ.get("S3_REGION", "us-east-1")
|
||||
# Optional overrides for S3-compatible backends (e.g. MinIO, rustfs, Ceph).
|
||||
# S3_ENDPOINT_URL: full URL of a custom S3 endpoint (e.g. "https://s3.example.com").
|
||||
# Leave unset to use AWS's default endpoint resolution.
|
||||
# S3_SIGNATURE_VERSION: botocore signature version used to sign requests and
|
||||
# presigned URLs. Defaults to None (botocore's default, currently SigV2 for
|
||||
# presigned URLs). Set to "s3v4" for S3-compatible servers that require SigV4.
|
||||
# S3_ADDRESSING_STYLE: "auto" (default), "path", or "virtual". Many S3-compatible
|
||||
# servers and TLS setups require "path".
|
||||
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL")
|
||||
S3_SIGNATURE_VERSION = os.environ.get("S3_SIGNATURE_VERSION")
|
||||
S3_ADDRESSING_STYLE = os.environ.get("S3_ADDRESSING_STYLE")
|
||||
|
||||
# Sentry configuration
|
||||
SENTRY_DSN = os.getenv("SENTRY_DSN")
|
||||
|
|
|
|||
|
|
@ -1,29 +1,84 @@
|
|||
from typing import Any, BinaryIO, Dict, Optional
|
||||
|
||||
import aioboto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
from api.constants import (
|
||||
S3_ADDRESSING_STYLE,
|
||||
S3_ENDPOINT_URL,
|
||||
S3_SIGNATURE_VERSION,
|
||||
)
|
||||
|
||||
from .base import BaseFileSystem
|
||||
|
||||
|
||||
class S3FileSystem(BaseFileSystem):
|
||||
"""S3 implementation of the filesystem interface."""
|
||||
|
||||
def __init__(self, bucket_name: str, region_name: str = "us-east-1"):
|
||||
def __init__(
|
||||
self,
|
||||
bucket_name: str,
|
||||
region_name: str = "us-east-1",
|
||||
endpoint_url: Optional[str] = None,
|
||||
signature_version: Optional[str] = None,
|
||||
addressing_style: Optional[str] = None,
|
||||
):
|
||||
"""Initialize S3 filesystem.
|
||||
|
||||
Args:
|
||||
bucket_name: Name of the S3 bucket
|
||||
region_name: AWS region name
|
||||
endpoint_url: Optional custom S3 endpoint (e.g. for MinIO/rustfs).
|
||||
Defaults to ``S3_ENDPOINT_URL`` env var; ``None`` uses AWS.
|
||||
signature_version: Optional botocore signature version (e.g.
|
||||
``"s3v4"``). Defaults to ``S3_SIGNATURE_VERSION`` env var;
|
||||
``None`` keeps botocore's default signing behavior.
|
||||
addressing_style: Optional S3 addressing style (``"path"`` /
|
||||
``"virtual"`` / ``"auto"``). Defaults to ``S3_ADDRESSING_STYLE``
|
||||
env var; ``None`` keeps botocore's default.
|
||||
"""
|
||||
self.bucket_name = bucket_name
|
||||
self.region_name = region_name
|
||||
self.endpoint_url = (
|
||||
endpoint_url if endpoint_url is not None else S3_ENDPOINT_URL
|
||||
)
|
||||
signature_version = (
|
||||
signature_version
|
||||
if signature_version is not None
|
||||
else S3_SIGNATURE_VERSION
|
||||
)
|
||||
addressing_style = (
|
||||
addressing_style if addressing_style is not None else S3_ADDRESSING_STYLE
|
||||
)
|
||||
self.session = aioboto3.Session()
|
||||
|
||||
# Build a botocore Config only when an override is requested so that the
|
||||
# default behavior is byte-for-byte unchanged when no env vars are set.
|
||||
config_kwargs: Dict[str, Any] = {}
|
||||
if signature_version:
|
||||
config_kwargs["signature_version"] = signature_version
|
||||
if addressing_style:
|
||||
config_kwargs["s3"] = {"addressing_style": addressing_style}
|
||||
self._config = Config(**config_kwargs) if config_kwargs else None
|
||||
|
||||
def _client_kwargs(self) -> Dict[str, Any]:
|
||||
"""Common kwargs for every ``session.client("s3", ...)`` call.
|
||||
|
||||
Only includes ``endpoint_url`` / ``config`` when configured, so default
|
||||
deployments behave exactly as before.
|
||||
"""
|
||||
kwargs: Dict[str, Any] = {"region_name": self.region_name}
|
||||
if self.endpoint_url:
|
||||
kwargs["endpoint_url"] = self.endpoint_url
|
||||
if self._config is not None:
|
||||
kwargs["config"] = self._config
|
||||
return kwargs
|
||||
|
||||
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
await s3_client.put_object(
|
||||
Bucket=self.bucket_name, Key=file_path, Body=await content.read()
|
||||
|
|
@ -35,7 +90,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
await s3_client.upload_file(
|
||||
local_path, self.bucket_name, destination_path
|
||||
|
|
@ -60,7 +115,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
"""
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
params = {"Bucket": self.bucket_name, "Key": file_path}
|
||||
|
||||
|
|
@ -101,7 +156,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
"""Get S3 object metadata."""
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
response = await s3_client.head_object(
|
||||
Bucket=self.bucket_name, Key=file_path
|
||||
|
|
@ -127,7 +182,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
"""Generate a presigned PUT URL for direct file upload."""
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
url = await s3_client.generate_presigned_url(
|
||||
"put_object",
|
||||
|
|
@ -146,7 +201,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
"""Download a file from S3 to local path."""
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
await s3_client.download_file(self.bucket_name, source_path, local_path)
|
||||
return True
|
||||
|
|
@ -157,7 +212,7 @@ class S3FileSystem(BaseFileSystem):
|
|||
"""Copy a file within S3 (server-side copy)."""
|
||||
try:
|
||||
async with self.session.client(
|
||||
"s3", region_name=self.region_name
|
||||
"s3", **self._client_kwargs()
|
||||
) as s3_client:
|
||||
await s3_client.copy_object(
|
||||
Bucket=self.bucket_name,
|
||||
|
|
|
|||
|
|
@ -148,6 +148,15 @@ services:
|
|||
# Storage configuration - using local MinIO
|
||||
ENABLE_AWS_S3: "false"
|
||||
|
||||
# To use AWS S3 or any S3-compatible server (MinIO, rustfs, Ceph, ...)
|
||||
# instead of the bundled MinIO, set ENABLE_AWS_S3 to "true" and provide:
|
||||
# S3_BUCKET, S3_REGION, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY
|
||||
# For a non-AWS S3-compatible server, also set:
|
||||
# S3_ENDPOINT_URL e.g. https://s3.example.com
|
||||
# S3_SIGNATURE_VERSION set "s3v4" if the server requires SigV4 (e.g. rustfs)
|
||||
# S3_ADDRESSING_STYLE set "path" if the server / TLS cert requires path-style
|
||||
# The S3 backend issues real presigned URLs, so the bucket can stay private.
|
||||
|
||||
# MinIO
|
||||
MINIO_ENDPOINT: "minio:9000"
|
||||
# Full URL (with scheme) browsers use to reach MinIO. For remote
|
||||
|
|
|
|||
|
|
@ -95,6 +95,32 @@ Dograh uses **MinIO by default**, which is bundled with the self-hosted deployme
|
|||
| `ENABLE_AWS_S3` | `false` | Set to `true` to use AWS S3 instead of MinIO |
|
||||
| `S3_BUCKET` | `null` | S3 bucket name |
|
||||
| `S3_REGION` | `us-east-1` | AWS region |
|
||||
| `S3_ENDPOINT_URL` | `null` | Custom S3 endpoint for S3-compatible servers (e.g. `https://s3.example.com`). Leave unset for AWS. |
|
||||
| `S3_SIGNATURE_VERSION` | `null` | Signing version. Unset uses botocore's default; set `s3v4` for servers that require SigV4. |
|
||||
| `S3_ADDRESSING_STYLE` | `null` | `auto` (default), `path`, or `virtual`. Many S3-compatible servers and TLS setups require `path`. |
|
||||
|
||||
Credentials come from the standard `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` environment variables.
|
||||
|
||||
#### S3-compatible servers (MinIO, rustfs, Ceph, ...)
|
||||
|
||||
The S3 backend can target any S3-compatible server, not just AWS. Prefer it over the MinIO backend when you need **presigned URLs against a private bucket**: the MinIO backend returns plain unsigned object URLs and relies on the bucket being anonymously public-readable, whereas the S3 backend issues real presigned URLs so the bucket can stay private.
|
||||
|
||||
To use it, set `ENABLE_AWS_S3=true` and point it at your server with the `S3_*` overrides above. For example, against [rustfs](https://github.com/rustfs/rustfs):
|
||||
|
||||
```bash
|
||||
ENABLE_AWS_S3=true
|
||||
S3_BUCKET=voice-audio
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT_URL=https://s3.example.com
|
||||
S3_SIGNATURE_VERSION=s3v4 # rustfs rejects SigV2 with SignatureDoesNotMatch
|
||||
S3_ADDRESSING_STYLE=path # rustfs and most non-AWS TLS certs require path-style
|
||||
AWS_ACCESS_KEY_ID=...
|
||||
AWS_SECRET_ACCESS_KEY=...
|
||||
```
|
||||
|
||||
<Note>
|
||||
Presigned URLs point at `S3_ENDPOINT_URL`, so that host must be reachable from the browser. Because browsers fetch transcripts cross-origin, the bucket also needs a CORS rule allowing your app's origin for `GET`/`HEAD` — configure this on the storage server (e.g. via `PutBucketCors`), not in Dograh.
|
||||
</Note>
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue