Hash audio profile paths

This commit is contained in:
Abel Luck 2026-03-31 15:14:17 +02:00
parent 2ad0536bb0
commit 23d03cd9d5
4 changed files with 59 additions and 41 deletions

View file

@ -66,7 +66,7 @@ def published_media_path(
file_type: FileType, source_url: str, profile: Mapping[str, Any] file_type: FileType, source_url: str, profile: Mapping[str, Any]
) -> str: ) -> str:
if file_type == FileType.AUDIO: if file_type == FileType.AUDIO:
return variant_media_path(local_audio_path(source_url), profile) return variant_media_path(local_audio_path(source_url), profile, hashed=True)
if file_type == FileType.VIDEO: if file_type == FileType.VIDEO:
return variant_media_path(local_video_path(source_url), profile, hashed=True) return variant_media_path(local_video_path(source_url), profile, hashed=True)
raise ValueError(f"Unsupported file type for published media path: {file_type}") raise ValueError(f"Unsupported file type for published media path: {file_type}")

View file

@ -76,10 +76,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
item.audios = [ item.audios = [
{ {
"url": source_audio, "url": source_audio,
"path": f"{audio_base_path}-vbr7.mp3", "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"published_url": _published_url( "published_url": _published_url(
"https://mirror.example", "https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3", f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
), ),
"checksum": "audio-default", "checksum": "audio-default",
"status": "downloaded", "status": "downloaded",
@ -87,9 +87,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{ {
"url": _published_url( "url": _published_url(
"https://mirror.example", "https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3", f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
), ),
"path": f"{audio_base_path}-vbr7.mp3", "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"type": "audio/mp3", "type": "audio/mp3",
"medium": "audio", "medium": "audio",
"isDefault": "true", "isDefault": "true",
@ -102,9 +102,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{ {
"url": _published_url( "url": _published_url(
"https://mirror.example", "https://mirror.example",
f"audio/{audio_base_path}-vbr3.aac", f"audio/{audio_base_path}-vbr3-4a2a58d5.aac",
), ),
"path": f"{audio_base_path}-vbr3.aac", "path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
"type": "audio/aac", "type": "audio/aac",
"medium": "audio", "medium": "audio",
"isDefault": "false", "isDefault": "false",
@ -259,7 +259,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert enclosure.attrib == { assert enclosure.attrib == {
"url": ( "url": (
f"https://mirror.example/feeds/demo/audio/" f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3" f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
), ),
"length": "4567", "length": "4567",
"type": "audio/mp3", "type": "audio/mp3",
@ -277,7 +277,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{ {
"url": ( "url": (
f"https://mirror.example/feeds/demo/audio/" f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3" f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
), ),
"type": "audio/mp3", "type": "audio/mp3",
"medium": "audio", "medium": "audio",
@ -291,7 +291,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{ {
"url": ( "url": (
f"https://mirror.example/feeds/demo/audio/" f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr3.aac" f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac"
), ),
"type": "audio/aac", "type": "audio/aac",
"medium": "audio", "medium": "audio",

View file

@ -66,12 +66,13 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
spider.rewrite_image_url("https://example.com/media/photo.jpg") spider.rewrite_image_url("https://example.com/media/photo.jpg")
== f"images/{local_image_path('https://example.com/media/photo.jpg')}" == f"images/{local_image_path('https://example.com/media/photo.jpg')}"
) )
assert ( assert spider.rewrite_file_url(
spider.rewrite_file_url( FileType.AUDIO,
FileType.AUDIO, "https://example.com/media/podcast.mp3",
"https://example.com/media/podcast.mp3", ) == (
) "audio/"
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3" f"{local_audio_path('https://example.com/media/podcast.mp3')}"
"-vbr7-3b2b0f13.mp3"
) )
assert spider.rewrite_file_url( assert spider.rewrite_file_url(
FileType.VIDEO, FileType.VIDEO,
@ -83,17 +84,23 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
) )
def test_published_video_path_changes_when_profile_args_change() -> None: def test_published_media_path_changes_when_profile_args_change() -> None:
source_url = "https://example.com/media/clip.mp4" source_url = "https://example.com/media/clip.mp4"
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
base_profile = repub_settings.REPUBLISHER_VIDEO[0] base_profile = repub_settings.REPUBLISHER_VIDEO[0]
assert published_media_path( assert published_media_path(FileType.AUDIO, source_url, audio_profile) == (
FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0] f"{local_audio_path(source_url)}-vbr7-3b2b0f13.mp3"
) == (f"{local_audio_path(source_url)}-vbr7.mp3") )
assert published_media_path(FileType.VIDEO, source_url, base_profile) == ( assert published_media_path(FileType.VIDEO, source_url, base_profile) == (
f"{local_video_path(source_url)}-720-457f0928.mp4" f"{local_video_path(source_url)}-720-457f0928.mp4"
) )
changed_audio_profile = {**audio_profile, "max_bitrate": 128000}
assert published_media_path(
FileType.AUDIO, source_url, changed_audio_profile
) != published_media_path(FileType.AUDIO, source_url, audio_profile)
changed_profile = {**base_profile, "max_height": 1080} changed_profile = {**base_profile, "max_height": 1080}
assert published_media_path( assert published_media_path(
FileType.VIDEO, source_url, changed_profile FileType.VIDEO, source_url, changed_profile

View file

@ -338,7 +338,7 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
return str(output_path) return str(output_path)
def fake_probe_media(file_path: str): def fake_probe_media(file_path: str):
if file_path.endswith(".mp3-vbr7.mp3"): if file_path.endswith(".mp3-vbr7-3b2b0f13.mp3"):
return { return {
"format": { "format": {
"duration": "61.2", "duration": "61.2",
@ -423,18 +423,20 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
assert isinstance(result["checksum"], str) assert isinstance(result["checksum"], str)
assert result == { assert result == {
"url": source_url, "url": source_url,
"path": f"{audio_base_path}-vbr7.mp3", "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"published_url": ( "published_url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3" "https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr7-3b2b0f13.mp3"
), ),
"checksum": result["checksum"], "checksum": result["checksum"],
"status": "downloaded", "status": "downloaded",
"variants": [ "variants": [
{ {
"url": ( "url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3" "https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr7-3b2b0f13.mp3"
), ),
"path": f"{audio_base_path}-vbr7.mp3", "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"type": "audio/mp3", "type": "audio/mp3",
"medium": "audio", "medium": "audio",
"isDefault": "true", "isDefault": "true",
@ -446,9 +448,10 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
}, },
{ {
"url": ( "url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr3.aac" "https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr3-4a2a58d5.aac"
), ),
"path": f"{audio_base_path}-vbr3.aac", "path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
"type": "audio/aac", "type": "audio/aac",
"medium": "audio", "medium": "audio",
"isDefault": "false", "isDefault": "false",
@ -474,8 +477,8 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
} }
assert persisted == [ assert persisted == [
(audio_base_path, "audio/mpeg"), (audio_base_path, "audio/mpeg"),
(f"{audio_base_path}-vbr7.mp3", "audio/mp3"), (f"{audio_base_path}-vbr7-3b2b0f13.mp3", "audio/mp3"),
(f"{audio_base_path}-vbr3.aac", "audio/aac"), (f"{audio_base_path}-vbr3-4a2a58d5.aac", "audio/aac"),
] ]
completed_item = pipeline.item_completed( completed_item = pipeline.item_completed(
@ -647,8 +650,8 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
source_url = "https://example.com/podcast.mp3" source_url = "https://example.com/podcast.mp3"
audio_base_path = local_audio_path(source_url) audio_base_path = local_audio_path(source_url)
original_path = store_dir(pipeline) / audio_base_path original_path = store_dir(pipeline) / audio_base_path
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3" canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7-3b2b0f13.mp3"
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac" secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3-4a2a58d5.aac"
original_path.parent.mkdir(parents=True, exist_ok=True) original_path.parent.mkdir(parents=True, exist_ok=True)
original_path.write_bytes(b"original") original_path.write_bytes(b"original")
canonical_path.parent.mkdir(parents=True, exist_ok=True) canonical_path.parent.mkdir(parents=True, exist_ok=True)
@ -680,19 +683,27 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
lambda file_path: { lambda file_path: {
"format": { "format": {
"duration": "61.2", "duration": "61.2",
"size": "4567" if file_path.endswith("vbr7.mp3") else "3456", "size": ("4567" if file_path.endswith("vbr7-3b2b0f13.mp3") else "3456"),
"bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000", "bit_rate": (
"format_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac", "96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
),
"format_name": (
"mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
),
"format_long_name": "Audio", "format_long_name": "Audio",
}, },
"streams": [ "streams": [
{ {
"codec_type": "audio", "codec_type": "audio",
"codec_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac", "codec_name": (
"bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000", "mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
),
"bit_rate": (
"96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
),
"duration_ts": "61200", "duration_ts": "61200",
"sample_rate": ( "sample_rate": (
"44100" if file_path.endswith("vbr7.mp3") else "48000" "44100" if file_path.endswith("vbr7-3b2b0f13.mp3") else "48000"
), ),
"channels": 2, "channels": 2,
} }
@ -706,12 +717,12 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
item=item, item=item,
) )
assert result is not None assert result is not None
assert result["path"] == f"{audio_base_path}-vbr7.mp3" assert result["path"] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"
assert result["status"] == "uptodate" assert result["status"] == "uptodate"
assert [variant.get("path") for variant in result["variants"]] == [ assert [variant.get("path") for variant in result["variants"]] == [
f"{audio_base_path}-vbr7.mp3", f"{audio_base_path}-vbr7-3b2b0f13.mp3",
f"{audio_base_path}-vbr3.aac", f"{audio_base_path}-vbr3-4a2a58d5.aac",
audio_base_path, audio_base_path,
] ]
assert f"{audio_base_path}.mp3" not in stat_paths assert f"{audio_base_path}.mp3" not in stat_paths
assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3" assert stat_paths[0] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"