Fix ffprobe handling for WebM and format families

This commit is contained in:
Abel Luck 2026-04-01 17:13:19 +02:00
parent 0504013c5a
commit 05ac6ce20d
4 changed files with 500 additions and 169 deletions

View file

@ -14,7 +14,13 @@ from repub.exporters import RssExporter
from repub.items import ElementItem
from repub.rss import nsmap
from repub.spiders.rss_spider import RssFeedSpider
from repub.utils import local_audio_path, local_image_path, local_video_path
from repub.utils import (
FileType,
local_audio_path,
local_image_path,
local_video_path,
published_media_path,
)
RSS_DATE_PATTERN = re.compile(
r"^[A-Z][a-z]{2}, \d{2} [A-Z][a-z]{2} \d{4} \d{2}:\d{2}:\d{2} [+-]\d{4}$"
@ -69,17 +75,32 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
source_video = "https://source.example/media/video.mp4"
channel_image = "https://source.example/media/channel.png"
item_image = "https://source.example/media/cover.jpg"
audio_base_path = local_audio_path(source_audio)
audio_default_path = published_media_path(
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
)
audio_m4a_path = published_media_path(
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[1]
)
audio_webm_path = published_media_path(
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[2]
)
video_base_path = local_video_path(source_video)
video_main_path = published_media_path(
FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[0]
)
video_fallback_path = published_media_path(
FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[1]
)
def prepare_item(item: ElementItem) -> None:
audio_base_path = local_audio_path(source_audio)
video_base_path = local_video_path(source_video)
item.audios = [
{
"url": source_audio,
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"path": audio_default_path,
"published_url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
f"audio/{audio_default_path}",
),
"checksum": "audio-default",
"status": "downloaded",
@ -87,32 +108,47 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
f"audio/{audio_default_path}",
),
"path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"type": "audio/mp3",
"path": audio_default_path,
"type": "audio/mpeg",
"medium": "audio",
"isDefault": "true",
"fileSize": "4567",
"bitrate": "96000",
"bitrate": "37209",
"duration": "61.2",
"samplingrate": "44100",
"channels": "2",
"samplingrate": "48000",
"channels": "1",
},
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr3-4a2a58d5.aac",
f"audio/{audio_m4a_path}",
),
"path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
"type": "audio/aac",
"path": audio_m4a_path,
"type": "audio/mp4",
"medium": "audio",
"isDefault": "false",
"fileSize": "3456",
"bitrate": "88000",
"bitrate": "20746",
"duration": "61.2",
"samplingrate": "48000",
"channels": "2",
"channels": "1",
},
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_webm_path}",
),
"path": audio_webm_path,
"type": "audio/webm",
"medium": "audio",
"isDefault": "false",
"fileSize": "2345",
"bitrate": "48000",
"duration": "61.2",
"samplingrate": "48000",
"channels": "1",
},
{
"url": _published_url(
@ -135,10 +171,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
item.videos = [
{
"url": source_video,
"path": f"{video_base_path}-720-457f0928.mp4",
"path": video_main_path,
"published_url": _published_url(
"https://mirror.example",
f"video/{video_base_path}-720-457f0928.mp4",
f"video/{video_main_path}",
),
"checksum": "video-default",
"status": "downloaded",
@ -146,9 +182,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
f"video/{video_base_path}-720-457f0928.mp4",
f"video/{video_main_path}",
),
"path": f"{video_base_path}-720-457f0928.mp4",
"path": video_main_path,
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
@ -159,6 +195,22 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
"height": "720",
"framerate": "30/1",
},
{
"url": _published_url(
"https://mirror.example",
f"video/{video_fallback_path}",
),
"path": video_fallback_path,
"type": "video/webm",
"medium": "video",
"isDefault": "false",
"fileSize": "6789",
"bitrate": "64000",
"duration": "60.0",
"width": "1280",
"height": "720",
"framerate": "25/1",
},
{
"url": _published_url(
"https://mirror.example",
@ -257,12 +309,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
enclosure = root.find("./channel/item/enclosure")
assert enclosure is not None
assert enclosure.attrib == {
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
),
"url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"),
"length": "4567",
"type": "audio/mp3",
"type": "audio/mpeg",
}
assert len(enclosure) == 0
@ -276,32 +325,39 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert [variant.attrib for variant in audio_variants] == [
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"
),
"type": "audio/mp3",
"type": "audio/mpeg",
"medium": "audio",
"isDefault": "true",
"bitrate": "96000",
"samplingrate": "44100",
"channels": "2",
"bitrate": "37209",
"samplingrate": "48000",
"channels": "1",
"duration": "61.2",
"fileSize": "4567",
},
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac"
),
"type": "audio/aac",
"url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_m4a_path}"),
"type": "audio/mp4",
"medium": "audio",
"isDefault": "false",
"bitrate": "88000",
"bitrate": "20746",
"samplingrate": "48000",
"channels": "2",
"channels": "1",
"duration": "61.2",
"fileSize": "3456",
},
{
"url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_webm_path}"),
"type": "audio/webm",
"medium": "audio",
"isDefault": "false",
"bitrate": "48000",
"samplingrate": "48000",
"channels": "1",
"duration": "61.2",
"fileSize": "2345",
},
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
@ -321,10 +377,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
video_variants = video_group.findall("media:content", namespaces=nsmap)
assert [variant.attrib for variant in video_variants] == [
{
"url": (
f"https://mirror.example/feeds/demo/video/"
f"{local_video_path(source_video)}-720-457f0928.mp4"
),
"url": (f"https://mirror.example/feeds/demo/video/" f"{video_main_path}"),
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
@ -337,6 +390,22 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
"lang": "en",
"fileSize": "9876",
},
{
"url": (
f"https://mirror.example/feeds/demo/video/" f"{video_fallback_path}"
),
"type": "video/webm",
"medium": "video",
"isDefault": "false",
"expression": "full",
"bitrate": "64000",
"framerate": "25/1",
"duration": "60.0",
"height": "720",
"width": "1280",
"lang": "en",
"fileSize": "6789",
},
{
"url": (
f"https://mirror.example/feeds/demo/video/"