dograh/api/services/filesystem/minio.py

209 lines
7.3 KiB
Python
Raw Normal View History

2025-09-09 14:37:32 +05:30
import asyncio
import json
2025-09-09 14:37:32 +05:30
from typing import Any, BinaryIO, Dict, Optional
2025-09-10 11:18:58 +05:30
from loguru import logger
2025-09-09 14:37:32 +05:30
from minio import Minio
from minio.error import S3Error
from .base import BaseFileSystem
class MinioFileSystem(BaseFileSystem):
"""MinIO implementation of the filesystem interface for OSS users.
Two endpoints, two different purposes:
- endpoint (host:port) + secure (bool): used by the MinIO SDK for
container-to-container calls. The SDK requires these split.
- public_endpoint (full URL, e.g. "https://example.com"): used verbatim
when building URLs that browsers will fetch. Required.
2025-09-09 14:37:32 +05:30
"""
def __init__(
self,
endpoint: str = "localhost:9000",
access_key: str = "minioadmin",
secret_key: str = "minioadmin",
bucket_name: str = "voice-audio",
secure: bool = False,
public_endpoint: Optional[str] = None,
):
if not public_endpoint:
raise ValueError(
"MinioFileSystem requires public_endpoint (set MINIO_PUBLIC_ENDPOINT). "
"Expected a full URL with scheme, e.g. 'http://localhost:9000' or 'https://example.com'."
)
if not (
public_endpoint.startswith("http://")
or public_endpoint.startswith("https://")
):
raise ValueError(
f"MINIO_PUBLIC_ENDPOINT must include a scheme (http:// or https://), got: {public_endpoint!r}"
)
2025-09-09 14:37:32 +05:30
self.bucket_name = bucket_name
self.endpoint = endpoint
self.public_endpoint = public_endpoint.rstrip("/")
2025-09-09 14:37:32 +05:30
self.secure = secure
self.access_key = access_key
self.secret_key = secret_key
# Client for internal operations (uploads, etc.)
self.client = Minio(
endpoint, access_key=access_key, secret_key=secret_key, secure=secure
)
2025-09-10 11:18:58 +05:30
# Ensure bucket exists and configure anonymous access (using internal client)
2025-09-09 14:37:32 +05:30
try:
if not self.client.bucket_exists(self.bucket_name):
self.client.make_bucket(self.bucket_name)
# Set public read/write policy for local development
# This allows:
# 1. Anonymous downloads (s3:GetObject)
# 2. Anonymous uploads (s3:PutObject) - bypasses presigned URL signature issues
# 3. List bucket contents (s3:ListBucket) for debugging
# Note: This is set on every initialization to ensure policy is correct
# WARNING: Only use in local development, not production!
2025-09-10 11:18:58 +05:30
policy = {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": ["s3:GetObject", "s3:PutObject", "s3:DeleteObject"],
"Resource": [f"arn:aws:s3:::{self.bucket_name}/*"],
},
{
"Effect": "Allow",
"Principal": {"AWS": "*"},
"Action": ["s3:ListBucket"],
"Resource": [f"arn:aws:s3:::{self.bucket_name}"],
},
],
2025-09-10 11:18:58 +05:30
}
2025-09-10 11:18:58 +05:30
self.client.set_bucket_policy(self.bucket_name, json.dumps(policy))
2025-09-09 14:37:32 +05:30
except Exception as e:
# Bucket might already exist or we might be in a restricted environment
2025-09-10 11:18:58 +05:30
logger.debug(f"Bucket setup note: {e}")
2025-09-09 14:37:32 +05:30
pass
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
try:
data = await content.read()
def _put():
self.client.put_object(
self.bucket_name,
file_path,
data=bytes(data),
length=len(data),
)
await asyncio.to_thread(_put)
return True
except S3Error:
return False
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
try:
def _fput():
self.client.fput_object(self.bucket_name, destination_path, local_path)
await asyncio.to_thread(_fput)
return True
except S3Error:
return False
async def aget_signed_url(
self,
file_path: str,
expiration: int = 3600,
force_inline: bool = False,
use_internal_endpoint: bool = False,
2025-09-09 14:37:32 +05:30
) -> Optional[str]:
try:
if use_internal_endpoint:
protocol = "https" if self.secure else "http"
base = f"{protocol}://{self.endpoint}"
else:
base = self.public_endpoint
return f"{base}/{self.bucket_name}/{file_path}"
2025-09-10 11:18:58 +05:30
except Exception as e:
logger.error(f"Error generating MinIO URL: {e}")
2025-09-09 14:37:32 +05:30
return None
async def aget_file_metadata(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Get MinIO object metadata."""
try:
def _stat():
return self.client.stat_object(self.bucket_name, file_path)
stat = await asyncio.to_thread(_stat)
return {
"size": stat.size,
"created_at": stat.last_modified,
"modified_at": stat.last_modified,
"etag": stat.etag.strip('"') if stat.etag else None,
"content_type": stat.content_type,
"storage_class": None, # MinIO doesn't have storage classes like S3
}
except S3Error:
return None
async def aget_presigned_put_url(
self,
file_path: str,
expiration: int = 900,
content_type: str = "text/csv",
max_size: int = 10_485_760,
) -> Optional[str]:
"""Generate an unsigned URL for direct file upload.
For local MinIO development with anonymous upload enabled, we return
a simple unsigned URL instead of a presigned URL. This avoids signature
mismatch issues when the internal endpoint (minio:9000) differs from
the public endpoint (localhost:9000).
The bucket policy allows anonymous s3:PutObject, so no signature is needed.
"""
try:
url = f"{self.public_endpoint}/{self.bucket_name}/{file_path}"
logger.debug(f"Generated unsigned upload URL: {url}")
return url
except Exception as e:
logger.error(f"Error generating MinIO upload URL: {e}")
return None
async def adownload_file(self, source_path: str, local_path: str) -> bool:
"""Download a file from MinIO to local path."""
try:
def _fget():
self.client.fget_object(self.bucket_name, source_path, local_path)
await asyncio.to_thread(_fget)
return True
except S3Error:
return False
async def acopy_file(self, source_path: str, destination_path: str) -> bool:
"""Copy a file within MinIO (server-side copy)."""
try:
from minio.commonconfig import CopySource
def _copy():
self.client.copy_object(
self.bucket_name,
destination_path,
CopySource(self.bucket_name, source_path),
)
await asyncio.to_thread(_copy)
return True
except S3Error:
return False