diff --git a/repub/media.py b/repub/media.py index 54165fa..fe8074b 100644 --- a/repub/media.py +++ b/repub/media.py @@ -170,6 +170,10 @@ def _frame_rate(stream: Dict[str, Any]) -> Optional[str]: return None +def _scale_to_max_height(max_height: int) -> str: + return f"scale=-2:{max_height}" + + def audio_meta(probe: Dict[str, Any]) -> Optional[AudioMeta]: stream = primary_audio_stream(probe) if not stream: @@ -303,7 +307,7 @@ def video_transcode_params( params = {"extension": settings["extension"]} if len(passes) == 0 or is_vcodec: if not is_good_height: - params["vf"] = f"scale={width}:{height}" + params["vf"] = _scale_to_max_height(max_height) if not is_vcodec: params.update(settings["ffmpeg_video_params"]) if not is_acodec or not is_audio_bitrate: @@ -313,7 +317,7 @@ def video_transcode_params( for p in passes: p = copy.deepcopy(p) if not is_good_height: - p["vf"] = f"scale={width}:{height}" + p["vf"] = _scale_to_max_height(max_height) params["passes"].append(p) return params diff --git a/repub/utils.py b/repub/utils.py index d10b920..9ed61d5 100644 --- a/repub/utils.py +++ b/repub/utils.py @@ -1,4 +1,5 @@ import hashlib +import json import mimetypes from enum import Enum from pathlib import Path @@ -42,8 +43,23 @@ def local_audio_path(s: str) -> str: return local_file_path(s) -def variant_media_path(base_path: str, profile: Mapping[str, Any]) -> str: - return f"{base_path}-{profile['name']}.{profile['extension']}" +def profile_settings_hash(profile: Mapping[str, Any]) -> str: + settings = { + key: value + for key, value in profile.items() + if key not in {"name", "mimetype", "extension"} + } + payload = json.dumps(settings, sort_keys=True, separators=(",", ":")) + return hashlib.sha1(to_bytes(payload)).hexdigest()[:8] # nosec + + +def variant_media_path( + base_path: str, profile: Mapping[str, Any], *, hashed: bool = False +) -> str: + profile_name = str(profile["name"]) + if hashed: + profile_name = f"{profile_name}-{profile_settings_hash(profile)}" + return f"{base_path}-{profile_name}.{profile['extension']}" def published_media_path( @@ -52,7 +68,7 @@ def published_media_path( if file_type == FileType.AUDIO: return variant_media_path(local_audio_path(source_url), profile) if file_type == FileType.VIDEO: - return variant_media_path(local_video_path(source_url), profile) + return variant_media_path(local_video_path(source_url), profile, hashed=True) raise ValueError(f"Unsupported file type for published media path: {file_type}") diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py index 8b1ede5..fd51474 100644 --- a/tests/test_feed_validation.py +++ b/tests/test_feed_validation.py @@ -135,10 +135,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: item.videos = [ { "url": source_video, - "path": f"{video_base_path}-720.mp4", + "path": f"{video_base_path}-720-457f0928.mp4", "published_url": _published_url( "https://mirror.example", - f"video/{video_base_path}-720.mp4", + f"video/{video_base_path}-720-457f0928.mp4", ), "checksum": "video-default", "status": "downloaded", @@ -146,9 +146,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": _published_url( "https://mirror.example", - f"video/{video_base_path}-720.mp4", + f"video/{video_base_path}-720-457f0928.mp4", ), - "path": f"{video_base_path}-720.mp4", + "path": f"{video_base_path}-720-457f0928.mp4", "type": "video/mp4", "medium": "video", "isDefault": "true", @@ -323,7 +323,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": ( f"https://mirror.example/feeds/demo/video/" - f"{local_video_path(source_video)}-720.mp4" + f"{local_video_path(source_video)}-720-457f0928.mp4" ), "type": "video/mp4", "medium": "video", diff --git a/tests/test_file_feeds.py b/tests/test_file_feeds.py index 64e43e9..dc90b62 100644 --- a/tests/test_file_feeds.py +++ b/tests/test_file_feeds.py @@ -6,7 +6,13 @@ from scrapy.settings import Settings from repub import entrypoint as entrypoint_module from repub import settings as repub_settings from repub.spiders.rss_spider import RssFeedSpider -from repub.utils import FileType, local_audio_path, local_image_path, local_video_path +from repub.utils import ( + FileType, + local_audio_path, + local_image_path, + local_video_path, + published_media_path, +) def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None: @@ -67,15 +73,33 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None: ) == f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3" ) - assert ( - spider.rewrite_file_url( - FileType.VIDEO, - "https://example.com/media/clip.mp4", - ) - == f"video/{local_video_path('https://example.com/media/clip.mp4')}-720.mp4" + assert spider.rewrite_file_url( + FileType.VIDEO, + "https://example.com/media/clip.mp4", + ) == ( + "video/" + f"{local_video_path('https://example.com/media/clip.mp4')}" + "-720-457f0928.mp4" ) +def test_published_video_path_changes_when_profile_args_change() -> None: + source_url = "https://example.com/media/clip.mp4" + base_profile = repub_settings.REPUBLISHER_VIDEO[0] + + assert published_media_path( + FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0] + ) == (f"{local_audio_path(source_url)}-vbr7.mp3") + assert published_media_path(FileType.VIDEO, source_url, base_profile) == ( + f"{local_video_path(source_url)}-720-457f0928.mp4" + ) + + changed_profile = {**base_profile, "max_height": 1080} + assert published_media_path( + FileType.VIDEO, source_url, changed_profile + ) != published_media_path(FileType.VIDEO, source_url, base_profile) + + def test_rss_spider_keeps_items_with_empty_content_encoded() -> None: feed_text = """ diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 9aa9a25..926ab27 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -204,6 +204,111 @@ def test_transcode_video_prints_ffmpeg_output_on_error( assert ("video-stdout", False) in printed +def test_video_transcode_params_scales_to_max_height() -> None: + params = media.video_transcode_params( + { + "format": {"format_name": "mp4"}, + "streams": [ + { + "codec_type": "video", + "codec_name": "mpeg4", + "bit_rate": "2000000", + "duration_ts": "1", + "width": 1920, + "height": 1080, + }, + { + "codec_type": "audio", + "codec_name": "aac", + "bit_rate": "128000", + "duration_ts": "1", + }, + ], + }, + { + "name": "720", + "container": "mp4", + "vcodec": "h264", + "acodec": "mp3", + "audio_max_bitrate": 96000, + "ffmpeg_audio_params": {"acodec": "libmp3lame"}, + "ffmpeg_video_params": {"vcodec": "h264", "strict": "-2"}, + "max_height": 720, + "mimetype": "video/mp4", + "extension": "mp4", + }, + ) + + assert params == { + "extension": "mp4", + "vf": "scale=-2:720", + "vcodec": "h264", + "strict": "-2", + "acodec": "libmp3lame", + } + + +def test_video_transcode_params_scales_to_max_height_for_multipass() -> None: + params = media.video_transcode_params( + { + "format": {"format_name": "mp4"}, + "streams": [ + { + "codec_type": "video", + "codec_name": "mpeg4", + "bit_rate": "2000000", + "duration_ts": "1", + "width": 1920, + "height": 1080, + }, + { + "codec_type": "audio", + "codec_name": "mp3", + "bit_rate": "128000", + "duration_ts": "1", + }, + ], + }, + cast( + media.VideoSettings, + { + "name": "720", + "container": "webm", + "vcodec": "libvpx-vp9", + "acodec": "opus", + "audio_max_bitrate": 96000, + "ffmpeg_audio_params": {"c:a": "libopus", "b:a": "96k"}, + "ffmpeg_video_params": {}, + "max_height": 720, + "mimetype": "video/webm", + "extension": "webm", + "passes": [ + {"c:v": "libvpx-vp9", "pass": "1", "f": "null"}, + {"c:v": "libvpx-vp9", "pass": "2", "c:a": "libopus"}, + ], + }, + ), + ) + + assert params == { + "extension": "webm", + "passes": [ + { + "c:v": "libvpx-vp9", + "pass": "1", + "f": "null", + "vf": "scale=-2:720", + }, + { + "c:v": "libvpx-vp9", + "pass": "2", + "c:a": "libopus", + "vf": "scale=-2:720", + }, + ], + } + + def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variants( monkeypatch, tmp_path: Path ) -> None: @@ -410,6 +515,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant return str(output_path) monkeypatch.setattr(pipeline, "transcode", fake_transcode) + transcoded_suffix = "-720-457f0928.mp4" monkeypatch.setattr( media, "probe_media", @@ -418,12 +524,12 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant "duration": "60.0", "size": ( "12345" - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else "9876" ), "bit_rate": ( "456789" - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else "123456" ), "format_name": "mp4", @@ -435,23 +541,23 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant "codec_name": "h264", "bit_rate": ( "456789" - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else "123456" ), "duration_ts": "60000", "width": ( 640 - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else 1280 ), "height": ( 360 - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else 720 ), "avg_frame_rate": ( "24/1" - if _.endswith(".mp4") and not _.endswith("-720.mp4") + if _.endswith(".mp4") and not _.endswith(transcoded_suffix) else "30/1" ), }, @@ -487,18 +593,20 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant assert isinstance(result["checksum"], str) assert result == { "url": source_url, - "path": f"{video_base_path}-720.mp4", + "path": f"{video_base_path}-720-457f0928.mp4", "published_url": ( - f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4" + "https://mirror.example/feeds/nasa/video/" + f"{video_base_path}-720-457f0928.mp4" ), "checksum": result["checksum"], "status": "downloaded", "variants": [ { "url": ( - f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4" + "https://mirror.example/feeds/nasa/video/" + f"{video_base_path}-720-457f0928.mp4" ), - "path": f"{video_base_path}-720.mp4", + "path": f"{video_base_path}-720-457f0928.mp4", "type": "video/mp4", "medium": "video", "isDefault": "true", @@ -526,7 +634,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant } assert persisted == [ (video_base_path, "video/mp4"), - (f"{video_base_path}-720.mp4", "video/mp4"), + (f"{video_base_path}-720-457f0928.mp4", "video/mp4"), ]