Initial Commit 🚀 🚀

This commit is contained in:
Abhishek Kumar 2025-09-09 14:37:32 +05:30
commit 4f2a629340
444 changed files with 76863 additions and 0 deletions

31
api/utils/api_key.py Normal file
View file

@ -0,0 +1,31 @@
import hashlib
import secrets
from typing import Tuple
def generate_api_key() -> Tuple[str, str, str]:
"""Generate a new API key with its hash and prefix.
Returns:
Tuple of (raw_api_key, key_hash, key_prefix)
- raw_api_key: The actual API key to give to the user
- key_hash: SHA256 hash of the key for storage
- key_prefix: First 8 characters for display purposes
"""
raw_api_key = f"dgr_{secrets.token_urlsafe(32)}"
key_hash = hashlib.sha256(raw_api_key.encode()).hexdigest()
key_prefix = raw_api_key[:8]
return raw_api_key, key_hash, key_prefix
def hash_api_key(raw_api_key: str) -> str:
"""Hash an API key for comparison.
Args:
raw_api_key: The raw API key to hash
Returns:
SHA256 hash of the API key
"""
return hashlib.sha256(raw_api_key.encode()).hexdigest()

View file

@ -0,0 +1,84 @@
#!/usr/bin/env python3
"""Utility script to convert audio file sample rates using Pipecat's resampler."""
import asyncio
import sys
from pathlib import Path
import numpy as np
import soundfile as sf
# Add pipecat to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "pipecat" / "src"))
from pipecat.audio.utils import create_file_resampler
async def convert_audio_sample_rate(input_path: str, output_sample_rates: list[int]):
"""Convert an audio file to different sample rates.
Args:
input_path: Path to the input audio file
output_sample_rates: List of target sample rates to convert to
"""
input_file = Path(input_path)
if not input_file.exists():
print(f"Error: Input file '{input_path}' not found")
return
# Load the audio file using soundfile
print(f"Loading audio file: {input_path}")
audio_data, original_sample_rate = sf.read(input_path, dtype="int16")
print(f"Original sample rate: {original_sample_rate} Hz")
print(f"Shape: {audio_data.shape}")
print(f"Duration: {len(audio_data) / original_sample_rate:.2f} seconds")
# Convert to mono if stereo
if len(audio_data.shape) > 1 and audio_data.shape[1] > 1:
print("Converting to mono...")
audio_data = np.mean(audio_data, axis=1).astype(np.int16)
# Convert numpy array to bytes for resampler
raw_audio = audio_data.tobytes()
# Create resampler
resampler = create_file_resampler()
# Convert to each target sample rate
for target_rate in output_sample_rates:
print(f"\nConverting to {target_rate} Hz...")
# Resample the audio
resampled_audio = await resampler.resample(
raw_audio, original_sample_rate, target_rate
)
# Convert bytes back to numpy array
resampled_data = np.frombuffer(resampled_audio, dtype=np.int16)
# Generate output filename
output_name = input_file.stem.replace("24000", str(target_rate))
if "24000" not in input_file.stem:
output_name = f"{input_file.stem}-{target_rate}-mono"
# Save as MP3 using ffmpeg if available, otherwise WAV
output_path = input_file.parent / f"{output_name}.mp3"
wav_path = input_file.parent / f"{output_name}.wav"
# First save as WAV
sf.write(wav_path, resampled_data, target_rate, subtype="PCM_16")
print(f"Saved WAV: {wav_path}")
async def main():
"""Main function to convert the office ambience file."""
input_file = "/Users/abhishekkumar/Projects/dograh/dograh/api/assets/office-ambience-24000-mono.mp3"
target_rates = [8000, 16000]
await convert_audio_sample_rate(input_file, target_rates)
print("\nConversion complete!")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -0,0 +1,51 @@
"""Common template rendering utility."""
import re
from typing import Any, Dict
def render_template(template_str: str, template_var_mapping: Dict[str, Any]) -> str: # noqa: C901 complex but self-contained
"""Replace template placeholders in *template_str* with values from *template_var_mapping*.
Supported syntax:
* ``{{ variable_name }}``
* ``{{ variable_name | fallback }}``
* ``{{ variable_name | fallback:default_value }}``
If the variable is undefined and a *fallback* filter is specified the value
of *default_value* (or the *variable_name* itself if no default is given)
is used instead.
"""
if not template_str:
return template_str
# Regex matches e.g. ``{{ name }}``, ``{{ name | fallback }}``, ``{{ name | fallback:John }}``
pattern = r"\{\{\s*([^|\s}]+)(?:\s*\|\s*([^:}]+)(?::([^}]+))?)?\s*\}\}"
def _replace(match: re.Match[str]) -> str: # type: ignore[type-arg]
variable_name = match.group(1).strip()
filter_name = match.group(2).strip() if match.group(2) else None
filter_value = match.group(3).strip() if match.group(3) else None
# Pull value from context
value = template_var_mapping.get(variable_name)
# Apply filters
if filter_name == "fallback":
if value is None or value == "":
# Use explicit default value or a title-cased variable name.
value = (
filter_value if filter_value is not None else variable_name.title()
)
# Convert *None* to an empty string so that re.sub replacement works.
return str(value) if value is not None else ""
# Replace template variables
result = re.sub(pattern, _replace, template_str)
# Handle line breaks (convert literal \n to actual newlines)
result = result.replace("\\n", "\n")
return result

63
api/utils/worker.py Normal file
View file

@ -0,0 +1,63 @@
"""Utilities for worker process identification."""
import multiprocessing
import os
from loguru import logger
def get_worker_id() -> int:
"""Get the current worker ID from environment or process name.
Returns:
Worker ID (0-based index), or 0 if not in a worker process.
"""
# Check for custom ASGI_WORKER_ID (for future compatibility)
worker_id = os.getenv("ASGI_WORKER_ID")
if worker_id:
return int(worker_id)
# Debug log the process name to understand worker identification
process_name = multiprocessing.current_process().name
# Try to extract worker number from process name
# Uvicorn with --workers creates processes like "SpawnProcess-1", "SpawnProcess-2", etc.
# TODO FIXME: If a worker process crashes and uvicorn creates a new process,
# it assigns ID which may be beyond NUM_FASTAPI_WORKERS. Example: if we have
# 2 fastapi workers configured, and one of them dies, we can get a process name with
# SpawnProcess-3 which is bad
if "SpawnProcess" in process_name:
try:
# Extract the number after "SpawnProcess-"
worker_num = int(process_name.split("-")[-1])
# Convert to 0-based index
return worker_num - 1
except (ValueError, IndexError):
logger.warning(
f"Could not extract worker ID from process name: {process_name}"
)
# Gunicorn creates workers with names like "Worker-1", "Worker-2", etc.
if "Worker" in process_name:
try:
# Extract the number after "Worker-"
worker_num = int(process_name.split("-")[-1])
# Convert to 0-based index
return worker_num - 1
except (ValueError, IndexError):
logger.warning(
f"Could not extract worker ID from process name: {process_name}"
)
# Not in a worker process (main process or single-process mode)
return 0
def is_worker_process() -> bool:
"""Check if we're running in a worker process (not the main process).
Returns:
True if in a worker process, False if in main process or single-process mode.
"""
process_name = multiprocessing.current_process().name
return "SpawnProcess" in process_name or "Worker" in process_name