Hash audio profile paths

This commit is contained in:
Abel Luck 2026-03-31 15:14:17 +02:00
parent 2ad0536bb0
commit 23d03cd9d5
4 changed files with 59 additions and 41 deletions

View file

@ -66,7 +66,7 @@ def published_media_path(
file_type: FileType, source_url: str, profile: Mapping[str, Any]
) -> str:
if file_type == FileType.AUDIO:
return variant_media_path(local_audio_path(source_url), profile)
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
if file_type == FileType.VIDEO:
return variant_media_path(local_video_path(source_url), profile, hashed=True)
raise ValueError(f"Unsupported file type for published media path: {file_type}")

View file

@ -76,10 +76,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
item.audios = [
{
"url": source_audio,
"path": f"{audio_base_path}-vbr7.mp3",
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"published_url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3",
f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
),
"checksum": "audio-default",
"status": "downloaded",
@ -87,9 +87,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3",
f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
),
"path": f"{audio_base_path}-vbr7.mp3",
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
@ -102,9 +102,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr3.aac",
f"audio/{audio_base_path}-vbr3-4a2a58d5.aac",
),
"path": f"{audio_base_path}-vbr3.aac",
"path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
"type": "audio/aac",
"medium": "audio",
"isDefault": "false",
@ -259,7 +259,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert enclosure.attrib == {
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3"
f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
),
"length": "4567",
"type": "audio/mp3",
@ -277,7 +277,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3"
f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
),
"type": "audio/mp3",
"medium": "audio",
@ -291,7 +291,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr3.aac"
f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac"
),
"type": "audio/aac",
"medium": "audio",

View file

@ -66,12 +66,13 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
spider.rewrite_image_url("https://example.com/media/photo.jpg")
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
)
assert (
spider.rewrite_file_url(
assert spider.rewrite_file_url(
FileType.AUDIO,
"https://example.com/media/podcast.mp3",
)
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3"
) == (
"audio/"
f"{local_audio_path('https://example.com/media/podcast.mp3')}"
"-vbr7-3b2b0f13.mp3"
)
assert spider.rewrite_file_url(
FileType.VIDEO,
@ -83,17 +84,23 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
)
def test_published_video_path_changes_when_profile_args_change() -> None:
def test_published_media_path_changes_when_profile_args_change() -> None:
source_url = "https://example.com/media/clip.mp4"
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
base_profile = repub_settings.REPUBLISHER_VIDEO[0]
assert published_media_path(
FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0]
) == (f"{local_audio_path(source_url)}-vbr7.mp3")
assert published_media_path(FileType.AUDIO, source_url, audio_profile) == (
f"{local_audio_path(source_url)}-vbr7-3b2b0f13.mp3"
)
assert published_media_path(FileType.VIDEO, source_url, base_profile) == (
f"{local_video_path(source_url)}-720-457f0928.mp4"
)
changed_audio_profile = {**audio_profile, "max_bitrate": 128000}
assert published_media_path(
FileType.AUDIO, source_url, changed_audio_profile
) != published_media_path(FileType.AUDIO, source_url, audio_profile)
changed_profile = {**base_profile, "max_height": 1080}
assert published_media_path(
FileType.VIDEO, source_url, changed_profile

View file

@ -338,7 +338,7 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
return str(output_path)
def fake_probe_media(file_path: str):
if file_path.endswith(".mp3-vbr7.mp3"):
if file_path.endswith(".mp3-vbr7-3b2b0f13.mp3"):
return {
"format": {
"duration": "61.2",
@ -423,18 +423,20 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
assert isinstance(result["checksum"], str)
assert result == {
"url": source_url,
"path": f"{audio_base_path}-vbr7.mp3",
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"published_url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3"
"https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr7-3b2b0f13.mp3"
),
"checksum": result["checksum"],
"status": "downloaded",
"variants": [
{
"url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr7.mp3"
"https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr7-3b2b0f13.mp3"
),
"path": f"{audio_base_path}-vbr7.mp3",
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
@ -446,9 +448,10 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
},
{
"url": (
f"https://mirror.example/feeds/nasa/audio/{audio_base_path}-vbr3.aac"
"https://mirror.example/feeds/nasa/audio/"
f"{audio_base_path}-vbr3-4a2a58d5.aac"
),
"path": f"{audio_base_path}-vbr3.aac",
"path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
"type": "audio/aac",
"medium": "audio",
"isDefault": "false",
@ -474,8 +477,8 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
}
assert persisted == [
(audio_base_path, "audio/mpeg"),
(f"{audio_base_path}-vbr7.mp3", "audio/mp3"),
(f"{audio_base_path}-vbr3.aac", "audio/aac"),
(f"{audio_base_path}-vbr7-3b2b0f13.mp3", "audio/mp3"),
(f"{audio_base_path}-vbr3-4a2a58d5.aac", "audio/aac"),
]
completed_item = pipeline.item_completed(
@ -647,8 +650,8 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
source_url = "https://example.com/podcast.mp3"
audio_base_path = local_audio_path(source_url)
original_path = store_dir(pipeline) / audio_base_path
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3"
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac"
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7-3b2b0f13.mp3"
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3-4a2a58d5.aac"
original_path.parent.mkdir(parents=True, exist_ok=True)
original_path.write_bytes(b"original")
canonical_path.parent.mkdir(parents=True, exist_ok=True)
@ -680,19 +683,27 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
lambda file_path: {
"format": {
"duration": "61.2",
"size": "4567" if file_path.endswith("vbr7.mp3") else "3456",
"bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000",
"format_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac",
"size": ("4567" if file_path.endswith("vbr7-3b2b0f13.mp3") else "3456"),
"bit_rate": (
"96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
),
"format_name": (
"mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
),
"format_long_name": "Audio",
},
"streams": [
{
"codec_type": "audio",
"codec_name": "mp3" if file_path.endswith("vbr7.mp3") else "aac",
"bit_rate": "96000" if file_path.endswith("vbr7.mp3") else "88000",
"codec_name": (
"mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
),
"bit_rate": (
"96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
),
"duration_ts": "61200",
"sample_rate": (
"44100" if file_path.endswith("vbr7.mp3") else "48000"
"44100" if file_path.endswith("vbr7-3b2b0f13.mp3") else "48000"
),
"channels": 2,
}
@ -706,12 +717,12 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
item=item,
)
assert result is not None
assert result["path"] == f"{audio_base_path}-vbr7.mp3"
assert result["path"] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"
assert result["status"] == "uptodate"
assert [variant.get("path") for variant in result["variants"]] == [
f"{audio_base_path}-vbr7.mp3",
f"{audio_base_path}-vbr3.aac",
f"{audio_base_path}-vbr7-3b2b0f13.mp3",
f"{audio_base_path}-vbr3-4a2a58d5.aac",
audio_base_path,
]
assert f"{audio_base_path}.mp3" not in stat_paths
assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3"
assert stat_paths[0] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"