From 23d03cd9d5a5194427300f6309c594434215b69e Mon Sep 17 00:00:00 2001 From: Abel Luck Date: Tue, 31 Mar 2026 15:14:17 +0200 Subject: [PATCH] Hash audio profile paths --- repub/utils.py | 2 +- tests/test_feed_validation.py | 18 ++++++------ tests/test_file_feeds.py | 27 +++++++++++------- tests/test_pipelines.py | 53 +++++++++++++++++++++-------------- 4 files changed, 59 insertions(+), 41 deletions(-) diff --git a/repub/utils.py b/repub/utils.py index 9ed61d5..b8379a1 100644 --- a/repub/utils.py +++ b/repub/utils.py @@ -66,7 +66,7 @@ def published_media_path( file_type: FileType, source_url: str, profile: Mapping[str, Any] ) -> str: if file_type == FileType.AUDIO: - return variant_media_path(local_audio_path(source_url), profile) + return variant_media_path(local_audio_path(source_url), profile, hashed=True) if file_type == FileType.VIDEO: return variant_media_path(local_video_path(source_url), profile, hashed=True) raise ValueError(f"Unsupported file type for published media path: {file_type}") diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py index fd51474..22589b4 100644 --- a/tests/test_feed_validation.py +++ b/tests/test_feed_validation.py @@ -76,10 +76,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: item.audios = [ { "url": source_audio, - "path": f"{audio_base_path}-vbr7.mp3", + "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3", "published_url": _published_url( "https://mirror.example", - f"audio/{audio_base_path}-vbr7.mp3", + f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3", ), "checksum": "audio-default", "status": "downloaded", @@ -87,9 +87,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": _published_url( "https://mirror.example", - f"audio/{audio_base_path}-vbr7.mp3", + f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3", ), - "path": f"{audio_base_path}-vbr7.mp3", + "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3", "type": "audio/mp3", "medium": "audio", "isDefault": "true", @@ -102,9 +102,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": _published_url( "https://mirror.example", - f"audio/{audio_base_path}-vbr3.aac", + f"audio/{audio_base_path}-vbr3-4a2a58d5.aac", ), - "path": f"{audio_base_path}-vbr3.aac", + "path": f"{audio_base_path}-vbr3-4a2a58d5.aac", "type": "audio/aac", "medium": "audio", "isDefault": "false", @@ -259,7 +259,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: assert enclosure.attrib == { "url": ( f"https://mirror.example/feeds/demo/audio/" - f"{local_audio_path(source_audio)}-vbr7.mp3" + f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3" ), "length": "4567", "type": "audio/mp3", @@ -277,7 +277,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": ( f"https://mirror.example/feeds/demo/audio/" - f"{local_audio_path(source_audio)}-vbr7.mp3" + f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3" ), "type": "audio/mp3", "medium": "audio", @@ -291,7 +291,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None: { "url": ( f"https://mirror.example/feeds/demo/audio/" - f"{local_audio_path(source_audio)}-vbr3.aac" + f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac" ), "type": "audio/aac", "medium": "audio", diff --git a/tests/test_file_feeds.py b/tests/test_file_feeds.py index dc90b62..66246f3 100644 --- a/tests/test_file_feeds.py +++ b/tests/test_file_feeds.py @@ -66,12 +66,13 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None: spider.rewrite_image_url("https://example.com/media/photo.jpg") == f"images/{local_image_path('https://example.com/media/photo.jpg')}" ) - assert ( - spider.rewrite_file_url( - FileType.AUDIO, - "https://example.com/media/podcast.mp3", - ) - == f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3" + assert spider.rewrite_file_url( + FileType.AUDIO, + "https://example.com/media/podcast.mp3", + ) == ( + "audio/" + f"{local_audio_path('https://example.com/media/podcast.mp3')}" + "-vbr7-3b2b0f13.mp3" ) assert spider.rewrite_file_url( FileType.VIDEO, @@ -83,17 +84,23 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None: ) -def test_published_video_path_changes_when_profile_args_change() -> None: +def test_published_media_path_changes_when_profile_args_change() -> None: source_url = "https://example.com/media/clip.mp4" + audio_profile = repub_settings.REPUBLISHER_AUDIO[0] base_profile = repub_settings.REPUBLISHER_VIDEO[0] - assert published_media_path( - FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0] - ) == (f"{local_audio_path(source_url)}-vbr7.mp3") + assert published_media_path(FileType.AUDIO, source_url, audio_profile) == ( + f"{local_audio_path(source_url)}-vbr7-3b2b0f13.mp3" + ) assert published_media_path(FileType.VIDEO, source_url, base_profile) == ( f"{local_video_path(source_url)}-720-457f0928.mp4" ) + changed_audio_profile = {**audio_profile, "max_bitrate": 128000} + assert published_media_path( + FileType.AUDIO, source_url, changed_audio_profile + ) != published_media_path(FileType.AUDIO, source_url, audio_profile) + changed_profile = {**base_profile, "max_height": 1080} assert published_media_path( FileType.VIDEO, source_url, changed_profile diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index 926ab27..e82672b 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -338,7 +338,7 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant return str(output_path) def fake_probe_media(file_path: str): - if file_path.endswith(".mp3-vbr7.mp3"): + if file_path.endswith(".mp3-vbr7-3b2b0f13.mp3"): return { "format": { "duration": "61.2", @@ -423,18 +423,20 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant assert isinstance(result["checksum"], str) assert result == { "url": source_url, - "path": f"{audio_base_path}-vbr7.mp3", + "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3", "published_url": ( - f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3" + "https://mirror.example/feeds/nasa/audio/" + f"{audio_base_path}-vbr7-3b2b0f13.mp3" ), "checksum": result["checksum"], "status": "downloaded", "variants": [ { "url": ( - f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3" + "https://mirror.example/feeds/nasa/audio/" + f"{audio_base_path}-vbr7-3b2b0f13.mp3" ), - "path": f"{audio_base_path}-vbr7.mp3", + "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3", "type": "audio/mp3", "medium": "audio", "isDefault": "true", @@ -446,9 +448,10 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant }, { "url": ( - f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr3.aac" + "https://mirror.example/feeds/nasa/audio/" + f"{audio_base_path}-vbr3-4a2a58d5.aac" ), - "path": f"{audio_base_path}-vbr3.aac", + "path": f"{audio_base_path}-vbr3-4a2a58d5.aac", "type": "audio/aac", "medium": "audio", "isDefault": "false", @@ -474,8 +477,8 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant } assert persisted == [ (audio_base_path, "audio/mpeg"), - (f"{audio_base_path}-vbr7.mp3", "audio/mp3"), - (f"{audio_base_path}-vbr3.aac", "audio/aac"), + (f"{audio_base_path}-vbr7-3b2b0f13.mp3", "audio/mp3"), + (f"{audio_base_path}-vbr3-4a2a58d5.aac", "audio/aac"), ] completed_item = pipeline.item_completed( @@ -647,8 +650,8 @@ def test_audio_pipeline_media_to_download_checks_canonical_path( source_url = "https://example.com/podcast.mp3" audio_base_path = local_audio_path(source_url) original_path = store_dir(pipeline) / audio_base_path - canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3" - secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac" + canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7-3b2b0f13.mp3" + secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3-4a2a58d5.aac" original_path.parent.mkdir(parents=True, exist_ok=True) original_path.write_bytes(b"original") canonical_path.parent.mkdir(parents=True, exist_ok=True) @@ -680,19 +683,27 @@ def test_audio_pipeline_media_to_download_checks_canonical_path( lambda file_path: { "format": { "duration": "61.2", - "size": "4567" if file_path.endswith("vbr7.mp3") else "3456", - "bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000", - "format_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac", + "size": ("4567" if file_path.endswith("vbr7-3b2b0f13.mp3") else "3456"), + "bit_rate": ( + "96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000" + ), + "format_name": ( + "mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac" + ), "format_long_name": "Audio", }, "streams": [ { "codec_type": "audio", - "codec_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac", - "bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000", + "codec_name": ( + "mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac" + ), + "bit_rate": ( + "96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000" + ), "duration_ts": "61200", "sample_rate": ( - "44100" if file_path.endswith("vbr7.mp3") else "48000" + "44100" if file_path.endswith("vbr7-3b2b0f13.mp3") else "48000" ), "channels": 2, } @@ -706,12 +717,12 @@ def test_audio_pipeline_media_to_download_checks_canonical_path( item=item, ) assert result is not None - assert result["path"] == f"{audio_base_path}-vbr7.mp3" + assert result["path"] == f"{audio_base_path}-vbr7-3b2b0f13.mp3" assert result["status"] == "uptodate" assert [variant.get("path") for variant in result["variants"]] == [ - f"{audio_base_path}-vbr7.mp3", - f"{audio_base_path}-vbr3.aac", + f"{audio_base_path}-vbr7-3b2b0f13.mp3", + f"{audio_base_path}-vbr3-4a2a58d5.aac", audio_base_path, ] assert f"{audio_base_path}.mp3" not in stat_paths - assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3" + assert stat_paths[0] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"