Initial Commit 🚀 🚀

This commit is contained in:
Abhishek Kumar 2025-09-09 14:37:32 +05:30
commit 4f2a629340
444 changed files with 76863 additions and 0 deletions

View file

@ -0,0 +1,9 @@
from .base import BaseFileSystem
from .minio import MinioFileSystem
from .s3 import S3FileSystem
__all__ = [
"BaseFileSystem",
"S3FileSystem",
"MinioFileSystem",
]

View file

@ -0,0 +1,60 @@
from abc import ABC, abstractmethod
from typing import Any, BinaryIO, Dict, Optional
class BaseFileSystem(ABC):
"""Abstract base class for filesystem operations."""
@abstractmethod
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
"""Create a new file with the given content.
Args:
file_path: Path where the file should be created
content: File content as a binary stream
Returns:
bool: True if file was created successfully, False otherwise
"""
pass
@abstractmethod
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
"""Upload a file from local path to destination.
Args:
local_path: Path to the local file
destination_path: Path where the file should be uploaded
Returns:
bool: True if file was uploaded successfully, False otherwise
"""
pass
@abstractmethod
async def aget_signed_url(
self, file_path: str, expiration: int = 3600
) -> Optional[str]:
"""Generate a signed URL for temporary access to a file.
Args:
file_path: Path to the file
expiration: URL expiration time in seconds (default: 1 hour)
Returns:
Optional[str]: Signed URL if successful, None otherwise
"""
pass
@abstractmethod
async def aget_file_metadata(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Get metadata for a file.
Args:
file_path: Path to the file
Returns:
Optional[Dict[str, Any]]: File metadata if successful, None otherwise
Contains: size, created_at, modified_at, etag, etc.
"""
pass

View file

@ -0,0 +1,95 @@
import asyncio
import os
from datetime import datetime
from typing import BinaryIO, Optional
import aiofiles
from .base import BaseFileSystem
class LocalFileSystem(BaseFileSystem):
"""Local filesystem implementation."""
def __init__(self, base_path: str):
"""Initialize local filesystem.
Args:
base_path: Base directory path for file operations
"""
self.base_path = base_path
os.makedirs(base_path, exist_ok=True)
def _get_full_path(self, file_path: str) -> str:
"""Get the full path by joining with base path."""
return os.path.join(self.base_path, file_path)
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
try:
full_path = self._get_full_path(file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
async with aiofiles.open(full_path, "wb") as f:
await f.write(await content.read())
return True
except Exception:
return False
async def create_temp_file(self, file_path: str) -> bool:
try:
full_path = self._get_full_path(file_path)
os.makedirs(os.path.dirname(full_path), exist_ok=True)
return True
except Exception:
return False
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
try:
full_dest_path = self._get_full_path(destination_path)
os.makedirs(os.path.dirname(full_dest_path), exist_ok=True)
async with (
aiofiles.open(local_path, "rb") as src,
aiofiles.open(full_dest_path, "wb") as dst,
):
await dst.write(await src.read())
return True
except Exception:
return False
async def aget_signed_url(
self, file_path: str, expiration: int = 3600
) -> Optional[str]:
# For local filesystem, we'll create a temporary symlink with expiration
try:
full_path = self._get_full_path(file_path)
if not os.path.exists(full_path):
return None
# Create a temporary directory for symlinks
temp_dir = os.path.join(self.base_path, ".temp_links")
os.makedirs(temp_dir, exist_ok=True)
# Generate a unique temporary filename
temp_filename = (
f"{datetime.now().timestamp()}_{os.path.basename(file_path)}"
)
temp_path = os.path.join(temp_dir, temp_filename)
# Create symlink
os.symlink(full_path, temp_path)
# Schedule deletion after expiration
async def delete_after_expiration():
await asyncio.sleep(expiration)
try:
os.remove(temp_path)
except Exception:
pass
asyncio.create_task(delete_after_expiration())
return f"/files/{temp_filename}"
except Exception:
return None

View file

@ -0,0 +1,137 @@
import asyncio
from datetime import timedelta
from typing import Any, BinaryIO, Dict, Optional
from minio import Minio
from minio.error import S3Error
from .base import BaseFileSystem
class MinioFileSystem(BaseFileSystem):
"""MinIO implementation of the filesystem interface for OSS users.
Handles both internal (container-to-container) and external (browser) access:
- endpoint: Used for API operations (uploads, downloads from code)
- public_endpoint: Used for generating browser-accessible presigned URLs
Auto-detection logic:
1. If MINIO_PUBLIC_ENDPOINT env var is set, use it (for production/custom domains)
2. If endpoint is "minio:9000" (Docker internal), auto-use "localhost:9000" for browser
3. Otherwise, endpoint works for both (e.g., "localhost:9000" in local non-Docker setup)
"""
def __init__(
self,
endpoint: str = "localhost:9000",
access_key: str = "minioadmin",
secret_key: str = "minioadmin",
bucket_name: str = "voice-audio",
secure: bool = False,
public_endpoint: Optional[str] = None,
):
self.bucket_name = bucket_name
self.endpoint = endpoint
self.public_endpoint = public_endpoint or endpoint
self.secure = secure
self.access_key = access_key
self.secret_key = secret_key
# Client for internal operations (uploads, etc.)
self.client = Minio(
endpoint, access_key=access_key, secret_key=secret_key, secure=secure
)
# Ensure bucket exists (using internal client)
try:
if not self.client.bucket_exists(self.bucket_name):
self.client.make_bucket(self.bucket_name)
except Exception as e:
# Bucket might already exist or we might be in a restricted environment
pass
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
try:
data = await content.read()
def _put():
self.client.put_object(
self.bucket_name,
file_path,
data=bytes(data),
length=len(data),
)
await asyncio.to_thread(_put)
return True
except S3Error:
return False
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
try:
def _fput():
self.client.fput_object(self.bucket_name, destination_path, local_path)
await asyncio.to_thread(_fput)
return True
except S3Error:
return False
async def aget_signed_url(
self, file_path: str, expiration: int = 3600, force_inline: bool = False
) -> Optional[str]:
try:
def _presign():
response_headers = None
if force_inline and file_path.endswith(".txt"):
response_headers = {
"response-content-type": "text/plain",
"response-content-disposition": "inline",
}
# Generate URL with the main client
url = self.client.presigned_get_object(
self.bucket_name,
file_path,
expires=timedelta(seconds=expiration),
response_headers=response_headers,
)
# If we have different public endpoint, replace it in the URL
if self.endpoint != self.public_endpoint:
# Simple string replacement since presigned URLs are just strings
# Replace the endpoint in the URL
url = url.replace(
f"://{self.endpoint}/", f"://{self.public_endpoint}/"
)
url = url.replace(
f"Host={self.endpoint}", f"Host={self.public_endpoint}"
)
return url
url = await asyncio.to_thread(_presign)
return url
except S3Error:
return None
async def aget_file_metadata(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Get MinIO object metadata."""
try:
def _stat():
return self.client.stat_object(self.bucket_name, file_path)
stat = await asyncio.to_thread(_stat)
return {
"size": stat.size,
"created_at": stat.last_modified,
"modified_at": stat.last_modified,
"etag": stat.etag.strip('"') if stat.etag else None,
"content_type": stat.content_type,
"storage_class": None, # MinIO doesn't have storage classes like S3
}
except S3Error:
return None

View file

@ -0,0 +1,99 @@
from typing import Any, BinaryIO, Dict, Optional
import aioboto3
from botocore.exceptions import ClientError
from .base import BaseFileSystem
class S3FileSystem(BaseFileSystem):
"""S3 implementation of the filesystem interface."""
def __init__(self, bucket_name: str, region_name: str = "us-east-1"):
"""Initialize S3 filesystem.
Args:
bucket_name: Name of the S3 bucket
region_name: AWS region name
"""
self.bucket_name = bucket_name
self.region_name = region_name
self.session = aioboto3.Session()
async def acreate_file(self, file_path: str, content: BinaryIO) -> bool:
try:
async with self.session.client(
"s3", region_name=self.region_name
) as s3_client:
await s3_client.put_object(
Bucket=self.bucket_name, Key=file_path, Body=await content.read()
)
return True
except ClientError:
return False
async def aupload_file(self, local_path: str, destination_path: str) -> bool:
try:
async with self.session.client(
"s3", region_name=self.region_name
) as s3_client:
await s3_client.upload_file(
local_path, self.bucket_name, destination_path
)
return True
except ClientError:
return False
async def aget_signed_url(
self, file_path: str, expiration: int = 3600, force_inline: bool = False
) -> Optional[str]:
"""Generate a presigned GET url for the given object.
For transcript text files we force the response headers so that the
browser renders the content **inline** instead of triggering a file
download. We do this by asking S3 to override the content type &
disposition on the response.
"""
try:
async with self.session.client(
"s3", region_name=self.region_name
) as s3_client:
params = {"Bucket": self.bucket_name, "Key": file_path}
# Make transcripts viewable inline in the browser when requested
if force_inline and file_path.endswith(".txt"):
params.update(
{
"ResponseContentType": "text/plain",
"ResponseContentDisposition": "inline",
}
)
url = await s3_client.generate_presigned_url(
"get_object",
Params=params,
ExpiresIn=expiration,
)
return url
except ClientError:
return None
async def aget_file_metadata(self, file_path: str) -> Optional[Dict[str, Any]]:
"""Get S3 object metadata."""
try:
async with self.session.client(
"s3", region_name=self.region_name
) as s3_client:
response = await s3_client.head_object(
Bucket=self.bucket_name, Key=file_path
)
return {
"size": response.get("ContentLength"),
"created_at": response.get("LastModified"),
"modified_at": response.get("LastModified"),
"etag": response.get("ETag", "").strip('"'),
"content_type": response.get("ContentType"),
"storage_class": response.get("StorageClass"),
}
except ClientError:
return None