SurfSense/surfsense_backend/app/podcasts/rendering/cache.py
2026-06-10 18:44:03 +02:00

53 lines
1.8 KiB
Python

"""Content-addressed cache for synthesised segments.
Each segment's audio is keyed by everything that determines its bytes (voice,
language, speed, text). Keeping the cache in a stable per-podcast directory
makes re-renders cheap: changing one speaker's voice only misses that speaker's
turns, and a worker restart mid-render resumes from whatever was already
written. The key intentionally excludes the segment's position so identical
lines (e.g. repeated "Right.") synthesise once.
"""
from __future__ import annotations
import hashlib
import json
from pathlib import Path
from app.podcasts.tts import SynthesisRequest
class SegmentCache:
"""On-disk store of segment audio, addressed by request content hash."""
def __init__(self, root: Path) -> None:
self._root = root
self._root.mkdir(parents=True, exist_ok=True)
def key(self, request: SynthesisRequest) -> str:
"""A stable hash of the inputs that determine the synthesised bytes."""
material = json.dumps(
{
"voice": request.voice,
"language": request.language,
"speed": request.speed,
"text": request.text,
},
sort_keys=True,
ensure_ascii=True,
)
return hashlib.sha256(material.encode("utf-8")).hexdigest()
def path(self, key: str, container: str) -> Path:
return self._root / f"{key}.{container}"
def get(self, key: str, container: str) -> Path | None:
"""Return the cached segment path, or ``None`` on a miss."""
path = self.path(key, container)
return path if path.exists() else None
def put(self, key: str, container: str, data: bytes) -> Path:
"""Write ``data`` for ``key`` and return its path."""
path = self.path(key, container)
path.write_bytes(data)
return path