ASCILINE/test_ytdl_normalize.py

114 lines
4 KiB
Python
Raw Normal View History

"""
Deterministic tests for ytdl.normalize() no network required.
We synthesize the exact kinds of files YouTube serves that broke the engine
(VP9 video + Opus audio inside an mp4 container, and variable-frame-rate video)
and assert that normalize() turns them into canonical H.264 / AAC / CFR mp4s the
engine can open and time correctly.
"""
import shutil
import subprocess
import cv2
import pytest
import ytdl
def _run(*args):
return subprocess.run(args, capture_output=True, text=True)
def _has_encoders(*names):
"""True only if ffmpeg exists and lists every requested encoder."""
if not shutil.which("ffmpeg"):
return False
out = _run("ffmpeg", "-hide_banner", "-encoders").stdout
return all(name in out for name in names)
# Building the *broken* inputs needs these encoders; the fix itself only needs
# libx264/aac. Skip cleanly on a minimal ffmpeg instead of failing CI.
requires_vp9_opus = pytest.mark.skipif(
not _has_encoders("libvpx-vp9", "libopus"),
reason="ffmpeg without libvpx-vp9/libopus; cannot synthesize the broken input")
requires_x264 = pytest.mark.skipif(
not _has_encoders("libx264"), reason="ffmpeg without libx264")
def _make_vp9_opus_mp4(path):
"""A VP9+Opus stream copied into an mp4 — non-standard, exactly what
`--merge-output-format mp4` produces from YouTube's 'best' streams."""
src = str(path) + ".src.mkv"
_run("ffmpeg", "-y",
"-f", "lavfi", "-i", "testsrc=size=320x240:rate=24:duration=1",
"-f", "lavfi", "-i", "sine=frequency=440:duration=1",
"-c:v", "libvpx-vp9", "-b:v", "200k", "-c:a", "libopus",
"-loglevel", "error", src)
# copy (not re-encode) into mp4 -> opus-in-mp4, the broken container
r = _run("ffmpeg", "-y", "-i", src, "-c", "copy", "-loglevel", "error", str(path))
assert r.returncode == 0, r.stderr
def _make_vfr_mp4(path):
"""An H.264 mp4 whose nominal rate disagrees with its average rate (VFR)."""
r = _run("ffmpeg", "-y",
"-f", "lavfi", "-i", "testsrc=size=320x240:rate=60:duration=1",
"-vf", "select='not(mod(n,3))'", # drop 2 of every 3 frames -> VFR
"-fps_mode", "vfr", "-c:v", "libx264", "-an",
"-loglevel", "error", str(path))
assert r.returncode == 0, r.stderr
def _audio_decodes(path):
r = _run("ffmpeg", "-v", "error", "-i", str(path), "-t", "0.5", "-f", "null", "-")
return r.returncode == 0 and "Invalid data" not in r.stderr
@requires_vp9_opus
def test_vp9_opus_mp4_is_repaired(tmp_path):
bad = tmp_path / "bad.mp4"
_make_vp9_opus_mp4(bad)
info = ytdl._probe(str(bad))
assert info["vcodec"] == "vp9" # confirms we built the broken input
assert info["acodec"] == "opus"
assert ytdl.normalize(str(bad)) is True # it had to be repaired
fixed = ytdl._probe(str(bad))
assert fixed["vcodec"] == "h264"
assert fixed["acodec"] == "aac"
assert fixed["cfr"] is True
assert _audio_decodes(bad) # /audio extraction now works
cap = cv2.VideoCapture(str(bad)) # OpenCV can open + read it
ok, _ = cap.read()
fps = cap.get(cv2.CAP_PROP_FPS)
cap.release()
assert ok
assert abs(fps - 24) < 0.5 # engine sees the real, stable FPS
@requires_x264
def test_vfr_is_made_constant(tmp_path):
bad = tmp_path / "vfr.mp4"
_make_vfr_mp4(bad)
assert ytdl._probe(str(bad))["cfr"] is False
assert ytdl.normalize(str(bad)) is True
assert ytdl._probe(str(bad))["cfr"] is True
@requires_x264
def test_clean_file_is_left_alone(tmp_path):
good = tmp_path / "good.mp4"
r = _run("ffmpeg", "-y",
"-f", "lavfi", "-i", "testsrc=size=320x240:rate=24:duration=1",
"-f", "lavfi", "-i", "sine=frequency=440:duration=1",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-r", "24",
"-c:a", "aac", "-loglevel", "error", str(good))
assert r.returncode == 0, r.stderr
assert ytdl.normalize(str(good)) is False # fast path: no re-encode