From 0504013c5a9cf63bb797664dce166e4c361cfc5d Mon Sep 17 00:00:00 2001
From: Abel Luck <abel@guardianproject.info>
Date: Wed, 1 Apr 2026 16:58:06 +0200
Subject: [PATCH 1/4] update audio and video transcoding profiles for
 compatibility

---
 repub/settings.py | 114 ++++++++++++++++++++++++++--------------------
 1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/repub/settings.py b/repub/settings.py
index d39b635..252c974 100644
--- a/repub/settings.py
+++ b/repub/settings.py
@@ -102,79 +102,95 @@ MEDIA_ALLOW_REDIRECTS = True
 
 REPUBLISHER_AUDIO = [
     {
-        "name": "vbr7",
+        "name": "mp3_vbr7_voice",
         "format": "mp3",
-        "max_bitrate": 96000,
-        "mimetype": "audio/mp3",
+        "max_bitrate": 64000,
+        "mimetype": "audio/mpeg",
         "extension": "mp3",
         "ffmpeg_audio_params": {
             "acodec": "libmp3lame",
-            # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
             "qscale:a": "7",
+            "ac": "1",
+            "ar": "48000",
         },
     },
     {
-        "name": "vbr3",
-        "format": "aac",
-        "max_bitrate": 96000,
-        "mimetype": "audio/aac",
-        "extension": "aac",
+        "name": "m4a_aac_vbr2_voice",
+        "format": "m4a",
+        "max_bitrate": 64000,
+        "mimetype": "audio/mp4",
+        "extension": "m4a",
         "ffmpeg_audio_params": {
             "acodec": "libfdk_aac",
-            # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
-            "vbr": "3",
+            "vbr": "2",
+            "ac": "1",
+            "ar": "48000",
+        },
+    },
+    {
+        "name": "webm_opus_voice_48k",
+        "format": "webm",
+        "max_bitrate": 48000,
+        "mimetype": "audio/webm",
+        "extension": "webm",
+        "ffmpeg_audio_params": {
+            "acodec": "libopus",
+            "b:a": "48k",
+            "ac": "1",
+            "ar": "48000",
+            "application": "voip",
         },
     },
 ]
 REPUBLISHER_VIDEO = [
+    # broadly compatible
     {
-        "name": "720",
+        "name": "main",
         "container": "mp4",
         "vcodec": "h264",
-        "acodec": "mp3",
+        "acodec": "aac",
         "audio_max_bitrate": 96000,
         "ffmpeg_audio_params": {
-            "acodec": "libmp3lame",
-            # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
-            "qscale:a": "7",
+            "acodec": "aac",
+            "b:a": "96k",
+            "ac": "2",
+            "ar": "48000",
+        },
+        "ffmpeg_video_params": {
+            "vcodec": "libx264",
+            "pix_fmt": "yuv420p",
+            "profile:v": "main",
+            "level": "4.0",
+            "preset": "medium",
+            "crf": "22",
+            "movflags": "+faststart",
         },
-        "ffmpeg_video_params": {"vcodec": "h264", "strict": "-2"},
         "max_height": 720,
         "mimetype": "video/mp4",
         "extension": "mp4",
     },
-    # {
-    #    "passes": [
-    #        {
-    #            "c:v": "libvpx-vp9",
-    #            "b:v": "0",
-    #            "crf": "30",
-    #            "pass": "1",
-    #            "deadline": "good",
-    #            "row-mt": "1",
-    #            "f": "null",
-    #        },
-    #        {
-    #            "c:v": "libvpx-vp9",
-    #            "b:v": "0",
-    #            "crf": "30",
-    #            "pass": "2",
-    #            "deadline": "good",
-    #            "row-mt": "1",
-    #            "c:a": "libopus",
-    #            "b:a": "96k",
-    #            "ac": "2",
-    #        },
-    #    ],
-    #    "name": "720",
-    #    "container": "webm",
-    #    "vcodec": "libvpx-vp9",
-    #    "acodec": "opus",
-    #    "audio_max_bitrate": 96000,
-    #    "max_height": 720,
-    #    "mimetype": "video/webm",
-    #    "extension": "webm",
-    # },
+    # linux fallback without patent encumberance
+    {
+        "name": "fallback",
+        "container": "webm",
+        "vcodec": "vp9",
+        "acodec": "opus",
+        "audio_max_bitrate": 96000,
+        "ffmpeg_audio_params": {
+            "acodec": "libopus",
+            "b:a": "96k",
+            "ac": "2",
+            "ar": "48000",
+        },
+        "ffmpeg_video_params": {
+            "vcodec": "libvpx-vp9",
+            "crf": "33",
+            "b:v": "0",
+        },
+        "max_height": 720,
+        "mimetype": "video/webm",
+        "extension": "webm",
+    },
 ]
 
 REPUBLISHER_FFMPEG_ENCODERS = ["libmp3lame", "libfdk_aac", "libvpx-vp9", "libopus"]

From 05ac6ce20d9972a05769bb71a3fdb591ffb780a0 Mon Sep 17 00:00:00 2001
From: Abel Luck <abel@guardianproject.info>
Date: Wed, 1 Apr 2026 17:13:19 +0200
Subject: [PATCH 2/4] Fix ffprobe handling for WebM and format families

---
 repub/media.py                |  45 ++--
 tests/test_feed_validation.py | 153 ++++++++----
 tests/test_file_feeds.py      |  18 +-
 tests/test_pipelines.py       | 453 ++++++++++++++++++++++++++--------
 4 files changed, 500 insertions(+), 169 deletions(-)

diff --git a/repub/media.py b/repub/media.py
index fe8074b..aec3a4d 100644
--- a/repub/media.py
+++ b/repub/media.py
@@ -77,7 +77,7 @@ def probe_media(file_path) -> Dict[str, Any]:
 def bitrate(info) -> float:
     try:
         return int(info["format"]["bit_rate"])
-    except KeyError | ValueError:
+    except (KeyError, ValueError):
         logger.error("extracting bitrate from ffprobe failed")
         return math.inf
 
@@ -85,16 +85,34 @@ def bitrate(info) -> float:
 def format_name(info) -> Optional[str]:
     try:
         return info["format"]["format_name"]
-    except KeyError | ValueError:
+    except (KeyError, ValueError):
         logger.error("extracting format from ffprobe failed")
         return None
 
 
+def _stream_duration_sort_key(stream: Dict[str, Any]) -> tuple[int, float]:
+    duration_ts = _int_value(stream.get("duration_ts"))
+    if duration_ts is not None:
+        return 1, float(duration_ts)
+    try:
+        duration = float(str(stream.get("duration", "")))
+    except (TypeError, ValueError):
+        duration = 0.0
+    return 0, duration
+
+
+def _matches_format(probe: Dict[str, Any], expected: str) -> bool:
+    current = format_name(probe)
+    if current is None:
+        return False
+    return expected in current.split(",")
+
+
 def primary_video_stream(probe):
     video_streams = [
         stream for stream in probe["streams"] if stream["codec_type"] == "video"
     ]
-    video_streams = sorted(video_streams, key=lambda x: x["duration_ts"], reverse=True)
+    video_streams = sorted(video_streams, key=_stream_duration_sort_key, reverse=True)
     if not video_streams:
         return None
     if len(video_streams) > 1:
@@ -108,7 +126,7 @@ def primary_audio_stream(probe):
     audio_streams = [
         stream for stream in probe["streams"] if stream["codec_type"] == "audio"
     ]
-    audio_streams = sorted(audio_streams, key=lambda x: x["duration_ts"], reverse=True)
+    audio_streams = sorted(audio_streams, key=_stream_duration_sort_key, reverse=True)
     if not audio_streams:
         return None
     if len(audio_streams) > 1:
@@ -126,7 +144,7 @@ def get_resolution(probe) -> Tuple[Optional[float], Optional[float]]:
         width = int(video_stream["width"])
         height = int(video_stream["height"])
         return width, height
-    except KeyError | ValueError:
+    except (KeyError, ValueError):
         logger.error("extracting resolution from ffprobe failed")
         return None, None
 
@@ -137,7 +155,7 @@ def get_vcodec_name(probe) -> Optional[str]:
         if not video_stream:
             return None
         return video_stream["codec_name"]
-    except KeyError | ValueError:
+    except (KeyError, ValueError):
         logger.error("extracting video codec_name from ffprobe failed")
         return None
 
@@ -147,8 +165,11 @@ def get_acodec_info(probe) -> Tuple[Optional[str], Optional[int]]:
         audio_stream = primary_audio_stream(probe)
         if not audio_stream:
             return None, None
-        return audio_stream["codec_name"], int(audio_stream["bit_rate"])
-    except KeyError | ValueError:
+        audio_bitrate = _int_value(
+            audio_stream.get("bit_rate") or probe["format"].get("bit_rate")
+        )
+        return audio_stream["codec_name"], audio_bitrate
+    except (KeyError, ValueError):
         logger.error("extracting audio codec_name from ffprobe failed")
         return None, None
 
@@ -218,7 +239,7 @@ def audio_transcode_params(
         is_br = True
     else:
         is_br = False
-    if format_name(probe_result) == fmt:
+    if _matches_format(probe_result, fmt):
         is_fmt = True
     else:
         is_fmt = False
@@ -289,11 +310,7 @@ def video_transcode_params(
         # TODO: turn this into an exception and catch it for reporting
         return None
 
-    current_container_many = format_name(probe_result)
-    is_container = False
-    if current_container_many is not None:
-        if target_container in current_container_many.split(","):
-            is_container = True
+    is_container = _matches_format(probe_result, target_container)
 
     is_vcodec = vcodec == target_vcodec
     is_acodec = acodec == target_acodec
diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py
index 22589b4..290a90a 100644
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@@ -14,7 +14,13 @@ from repub.exporters import RssExporter
 from repub.items import ElementItem
 from repub.rss import nsmap
 from repub.spiders.rss_spider import RssFeedSpider
-from repub.utils import local_audio_path, local_image_path, local_video_path
+from repub.utils import (
+    FileType,
+    local_audio_path,
+    local_image_path,
+    local_video_path,
+    published_media_path,
+)
 
 RSS_DATE_PATTERN = re.compile(
     r"^[A-Z][a-z]{2}, \d{2} [A-Z][a-z]{2} \d{4} \d{2}:\d{2}:\d{2} [+-]\d{4}$"
@@ -69,17 +75,32 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
     source_video = "https://source.example/media/video.mp4"
     channel_image = "https://source.example/media/channel.png"
     item_image = "https://source.example/media/cover.jpg"
+    audio_base_path = local_audio_path(source_audio)
+    audio_default_path = published_media_path(
+        FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
+    )
+    audio_m4a_path = published_media_path(
+        FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[1]
+    )
+    audio_webm_path = published_media_path(
+        FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[2]
+    )
+    video_base_path = local_video_path(source_video)
+    video_main_path = published_media_path(
+        FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[0]
+    )
+    video_fallback_path = published_media_path(
+        FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[1]
+    )
 
     def prepare_item(item: ElementItem) -> None:
-        audio_base_path = local_audio_path(source_audio)
-        video_base_path = local_video_path(source_video)
         item.audios = [
             {
                 "url": source_audio,
-                "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
+                "path": audio_default_path,
                 "published_url": _published_url(
                     "https://mirror.example",
-                    f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
+                    f"audio/{audio_default_path}",
                 ),
                 "checksum": "audio-default",
                 "status": "downloaded",
@@ -87,32 +108,47 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
                     {
                         "url": _published_url(
                             "https://mirror.example",
-                            f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
+                            f"audio/{audio_default_path}",
                         ),
-                        "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
-                        "type": "audio/mp3",
+                        "path": audio_default_path,
+                        "type": "audio/mpeg",
                         "medium": "audio",
                         "isDefault": "true",
                         "fileSize": "4567",
-                        "bitrate": "96000",
+                        "bitrate": "37209",
                         "duration": "61.2",
-                        "samplingrate": "44100",
-                        "channels": "2",
+                        "samplingrate": "48000",
+                        "channels": "1",
                     },
                     {
                         "url": _published_url(
                             "https://mirror.example",
-                            f"audio/{audio_base_path}-vbr3-4a2a58d5.aac",
+                            f"audio/{audio_m4a_path}",
                         ),
-                        "path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
-                        "type": "audio/aac",
+                        "path": audio_m4a_path,
+                        "type": "audio/mp4",
                         "medium": "audio",
                         "isDefault": "false",
                         "fileSize": "3456",
-                        "bitrate": "88000",
+                        "bitrate": "20746",
                         "duration": "61.2",
                         "samplingrate": "48000",
-                        "channels": "2",
+                        "channels": "1",
+                    },
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"audio/{audio_webm_path}",
+                        ),
+                        "path": audio_webm_path,
+                        "type": "audio/webm",
+                        "medium": "audio",
+                        "isDefault": "false",
+                        "fileSize": "2345",
+                        "bitrate": "48000",
+                        "duration": "61.2",
+                        "samplingrate": "48000",
+                        "channels": "1",
                     },
                     {
                         "url": _published_url(
@@ -135,10 +171,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
         item.videos = [
             {
                 "url": source_video,
-                "path": f"{video_base_path}-720-457f0928.mp4",
+                "path": video_main_path,
                 "published_url": _published_url(
                     "https://mirror.example",
-                    f"video/{video_base_path}-720-457f0928.mp4",
+                    f"video/{video_main_path}",
                 ),
                 "checksum": "video-default",
                 "status": "downloaded",
@@ -146,9 +182,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
                     {
                         "url": _published_url(
                             "https://mirror.example",
-                            f"video/{video_base_path}-720-457f0928.mp4",
+                            f"video/{video_main_path}",
                         ),
-                        "path": f"{video_base_path}-720-457f0928.mp4",
+                        "path": video_main_path,
                         "type": "video/mp4",
                         "medium": "video",
                         "isDefault": "true",
@@ -159,6 +195,22 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
                         "height": "720",
                         "framerate": "30/1",
                     },
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"video/{video_fallback_path}",
+                        ),
+                        "path": video_fallback_path,
+                        "type": "video/webm",
+                        "medium": "video",
+                        "isDefault": "false",
+                        "fileSize": "6789",
+                        "bitrate": "64000",
+                        "duration": "60.0",
+                        "width": "1280",
+                        "height": "720",
+                        "framerate": "25/1",
+                    },
                     {
                         "url": _published_url(
                             "https://mirror.example",
@@ -257,12 +309,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
     enclosure = root.find("./channel/item/enclosure")
     assert enclosure is not None
     assert enclosure.attrib == {
-        "url": (
-            f"https://mirror.example/feeds/demo/audio/"
-            f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
-        ),
+        "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"),
         "length": "4567",
-        "type": "audio/mp3",
+        "type": "audio/mpeg",
     }
     assert len(enclosure) == 0
 
@@ -276,32 +325,39 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
     assert [variant.attrib for variant in audio_variants] == [
         {
             "url": (
-                f"https://mirror.example/feeds/demo/audio/"
-                f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
+                f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"
             ),
-            "type": "audio/mp3",
+            "type": "audio/mpeg",
             "medium": "audio",
             "isDefault": "true",
-            "bitrate": "96000",
-            "samplingrate": "44100",
-            "channels": "2",
+            "bitrate": "37209",
+            "samplingrate": "48000",
+            "channels": "1",
             "duration": "61.2",
             "fileSize": "4567",
         },
         {
-            "url": (
-                f"https://mirror.example/feeds/demo/audio/"
-                f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac"
-            ),
-            "type": "audio/aac",
+            "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_m4a_path}"),
+            "type": "audio/mp4",
             "medium": "audio",
             "isDefault": "false",
-            "bitrate": "88000",
+            "bitrate": "20746",
             "samplingrate": "48000",
-            "channels": "2",
+            "channels": "1",
             "duration": "61.2",
             "fileSize": "3456",
         },
+        {
+            "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_webm_path}"),
+            "type": "audio/webm",
+            "medium": "audio",
+            "isDefault": "false",
+            "bitrate": "48000",
+            "samplingrate": "48000",
+            "channels": "1",
+            "duration": "61.2",
+            "fileSize": "2345",
+        },
         {
             "url": (
                 f"https://mirror.example/feeds/demo/audio/"
@@ -321,10 +377,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
     video_variants = video_group.findall("media:content", namespaces=nsmap)
     assert [variant.attrib for variant in video_variants] == [
         {
-            "url": (
-                f"https://mirror.example/feeds/demo/video/"
-                f"{local_video_path(source_video)}-720-457f0928.mp4"
-            ),
+            "url": (f"https://mirror.example/feeds/demo/video/" f"{video_main_path}"),
             "type": "video/mp4",
             "medium": "video",
             "isDefault": "true",
@@ -337,6 +390,22 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
             "lang": "en",
             "fileSize": "9876",
         },
+        {
+            "url": (
+                f"https://mirror.example/feeds/demo/video/" f"{video_fallback_path}"
+            ),
+            "type": "video/webm",
+            "medium": "video",
+            "isDefault": "false",
+            "expression": "full",
+            "bitrate": "64000",
+            "framerate": "25/1",
+            "duration": "60.0",
+            "height": "720",
+            "width": "1280",
+            "lang": "en",
+            "fileSize": "6789",
+        },
         {
             "url": (
                 f"https://mirror.example/feeds/demo/video/"
diff --git a/tests/test_file_feeds.py b/tests/test_file_feeds.py
index 66246f3..ff43b6a 100644
--- a/tests/test_file_feeds.py
+++ b/tests/test_file_feeds.py
@@ -71,16 +71,22 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
         "https://example.com/media/podcast.mp3",
     ) == (
         "audio/"
-        f"{local_audio_path('https://example.com/media/podcast.mp3')}"
-        "-vbr7-3b2b0f13.mp3"
+        + published_media_path(
+            FileType.AUDIO,
+            "https://example.com/media/podcast.mp3",
+            repub_settings.REPUBLISHER_AUDIO[0],
+        )
     )
     assert spider.rewrite_file_url(
         FileType.VIDEO,
         "https://example.com/media/clip.mp4",
     ) == (
         "video/"
-        f"{local_video_path('https://example.com/media/clip.mp4')}"
-        "-720-457f0928.mp4"
+        + published_media_path(
+            FileType.VIDEO,
+            "https://example.com/media/clip.mp4",
+            repub_settings.REPUBLISHER_VIDEO[0],
+        )
     )
 
 
@@ -90,10 +96,10 @@ def test_published_media_path_changes_when_profile_args_change() -> None:
     base_profile = repub_settings.REPUBLISHER_VIDEO[0]
 
     assert published_media_path(FileType.AUDIO, source_url, audio_profile) == (
-        f"{local_audio_path(source_url)}-vbr7-3b2b0f13.mp3"
+        f"{local_audio_path(source_url)}-mp3_vbr7_voice-1cc131cf.mp3"
     )
     assert published_media_path(FileType.VIDEO, source_url, base_profile) == (
-        f"{local_video_path(source_url)}-720-457f0928.mp4"
+        f"{local_video_path(source_url)}-main-4fb03ba0.mp4"
     )
 
     changed_audio_profile = {**audio_profile, "max_bitrate": 128000}
diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py
index e82672b..523f9bd 100644
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@@ -8,6 +8,7 @@ from scrapy.crawler import Crawler
 from scrapy.http import Request, Response
 
 from repub import media
+from repub import settings as repub_settings
 from repub.config import (
     FeedConfig,
     RepublisherConfig,
@@ -16,7 +17,12 @@ from repub.config import (
 )
 from repub.items import ElementItem
 from repub.pipelines import AudioPipeline, FilePipeline, VideoPipeline
-from repub.utils import local_audio_path, local_video_path
+from repub.utils import (
+    FileType,
+    local_audio_path,
+    local_video_path,
+    published_media_path,
+)
 
 
 def build_test_crawler(tmp_path: Path) -> SimpleNamespace:
@@ -309,6 +315,103 @@ def test_video_transcode_params_scales_to_max_height_for_multipass() -> None:
     }
 
 
+def test_audio_transcode_params_accepts_m4a_format_family() -> None:
+    params = media.audio_transcode_params(
+        {
+            "format": {
+                "bit_rate": "20000",
+                "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
+            },
+            "streams": [
+                {
+                    "codec_type": "audio",
+                    "codec_name": "aac",
+                    "bit_rate": "20000",
+                    "duration_ts": "1",
+                }
+            ],
+        },
+        cast(
+            media.AudioSettings,
+            {
+                "name": "m4a",
+                "format": "m4a",
+                "max_bitrate": 64000,
+                "mimetype": "audio/mp4",
+                "extension": "m4a",
+                "ffmpeg_audio_params": {
+                    "acodec": "libfdk_aac",
+                    "vbr": "2",
+                },
+            },
+        ),
+    )
+
+    assert params is None
+
+
+def test_audio_meta_handles_webm_without_duration_ts() -> None:
+    assert media.audio_meta(
+        {
+            "format": {
+                "duration": "1.0",
+                "size": "100",
+                "bit_rate": "48000",
+                "format_name": "matroska,webm",
+            },
+            "streams": [
+                {
+                    "codec_type": "audio",
+                    "codec_name": "opus",
+                    "sample_rate": "48000",
+                    "channels": 1,
+                }
+            ],
+        }
+    ) == {
+        "duration": "1.0",
+        "fileSize": "100",
+        "bitrate": 48000,
+        "samplingrate": 48000,
+        "channels": 1,
+    }
+
+
+def test_video_meta_handles_webm_without_duration_ts() -> None:
+    assert media.video_meta(
+        {
+            "format": {
+                "duration": "1.0",
+                "size": "200",
+                "bit_rate": "64000",
+                "format_name": "matroska,webm",
+            },
+            "streams": [
+                {
+                    "codec_type": "video",
+                    "codec_name": "vp9",
+                    "width": 640,
+                    "height": 360,
+                    "avg_frame_rate": "25/1",
+                },
+                {
+                    "codec_type": "audio",
+                    "codec_name": "opus",
+                    "sample_rate": "48000",
+                    "channels": 1,
+                },
+            ],
+        }
+    ) == {
+        "duration": "1.0",
+        "fileSize": "200",
+        "width": 640,
+        "height": 360,
+        "bitrate": 64000,
+        "framerate": "25/1",
+    }
+
+
 def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
     monkeypatch, tmp_path: Path
 ) -> None:
@@ -337,13 +440,24 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
         output_path.write_bytes(settings["name"].encode("utf-8"))
         return str(output_path)
 
+    audio_default_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0]
+    )
+    audio_m4a_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[1]
+    )
+    audio_webm_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[2]
+    )
+
     def fake_probe_media(file_path: str):
-        if file_path.endswith(".mp3-vbr7-3b2b0f13.mp3"):
+        file_name = Path(file_path).name
+        if file_path.endswith(audio_default_path) or file_name == "mp3_vbr7_voice.mp3":
             return {
                 "format": {
                     "duration": "61.2",
                     "size": "4567",
-                    "bit_rate": "96000",
+                    "bit_rate": "37209",
                     "format_name": "mp3",
                     "format_long_name": "MP3",
                 },
@@ -351,48 +465,69 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
                     {
                         "codec_type": "audio",
                         "codec_name": "mp3",
-                        "bit_rate": "96000",
+                        "bit_rate": "37209",
                         "duration_ts": "61200",
-                        "sample_rate": "44100",
-                        "channels": 2,
+                        "sample_rate": "48000",
+                        "channels": 1,
                     }
                 ],
             }
-        if file_path.endswith(".mp3"):
+        if file_path.endswith(audio_m4a_path) or file_name == "m4a_aac_vbr2_voice.m4a":
             return {
                 "format": {
                     "duration": "61.2",
-                    "size": "5678",
-                    "bit_rate": "128000",
-                    "format_name": "mp3",
-                    "format_long_name": "MP3",
+                    "size": "3456",
+                    "bit_rate": "20746",
+                    "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
+                    "format_long_name": "AAC",
                 },
                 "streams": [
                     {
                         "codec_type": "audio",
-                        "codec_name": "mp3",
-                        "bit_rate": "128000",
+                        "codec_name": "aac",
+                        "bit_rate": "20746",
                         "duration_ts": "61200",
-                        "sample_rate": "44100",
-                        "channels": 2,
+                        "sample_rate": "48000",
+                        "channels": 1,
+                    }
+                ],
+            }
+        if (
+            file_path.endswith(audio_webm_path)
+            or file_name == "webm_opus_voice_48k.webm"
+        ):
+            return {
+                "format": {
+                    "duration": "61.2",
+                    "size": "2345",
+                    "bit_rate": "48000",
+                    "format_name": "matroska,webm",
+                    "format_long_name": "WebM",
+                },
+                "streams": [
+                    {
+                        "codec_type": "audio",
+                        "codec_name": "opus",
+                        "sample_rate": "48000",
+                        "channels": 1,
                     }
                 ],
             }
         return {
             "format": {
                 "duration": "61.2",
-                "size": "3456",
-                "bit_rate": "88000",
-                "format_name": "aac",
-                "format_long_name": "AAC",
+                "size": "5678",
+                "bit_rate": "128000",
+                "format_name": "mp3",
+                "format_long_name": "MP3",
             },
             "streams": [
                 {
                     "codec_type": "audio",
-                    "codec_name": "aac",
-                    "bit_rate": "88000",
+                    "codec_name": "mp3",
+                    "bit_rate": "128000",
                     "duration_ts": "61200",
-                    "sample_rate": "48000",
+                    "sample_rate": "44100",
                     "channels": 2,
                 }
             ],
@@ -423,43 +558,48 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
     assert isinstance(result["checksum"], str)
     assert result == {
         "url": source_url,
-        "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
+        "path": audio_default_path,
         "published_url": (
-            "https://mirror.example/feeds/nasa/audio/"
-            f"{audio_base_path}-vbr7-3b2b0f13.mp3"
+            f"https://mirror.example/feeds/nasa/audio/{audio_default_path}"
         ),
         "checksum": result["checksum"],
         "status": "downloaded",
         "variants": [
             {
-                "url": (
-                    "https://mirror.example/feeds/nasa/audio/"
-                    f"{audio_base_path}-vbr7-3b2b0f13.mp3"
-                ),
-                "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
-                "type": "audio/mp3",
+                "url": f"https://mirror.example/feeds/nasa/audio/{audio_default_path}",
+                "path": audio_default_path,
+                "type": "audio/mpeg",
                 "medium": "audio",
                 "isDefault": "true",
                 "fileSize": "4567",
-                "bitrate": 96000,
+                "bitrate": 37209,
                 "duration": "61.2",
-                "samplingrate": 44100,
-                "channels": 2,
+                "samplingrate": 48000,
+                "channels": 1,
             },
             {
-                "url": (
-                    "https://mirror.example/feeds/nasa/audio/"
-                    f"{audio_base_path}-vbr3-4a2a58d5.aac"
-                ),
-                "path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
-                "type": "audio/aac",
+                "url": f"https://mirror.example/feeds/nasa/audio/{audio_m4a_path}",
+                "path": audio_m4a_path,
+                "type": "audio/mp4",
                 "medium": "audio",
                 "isDefault": "false",
                 "fileSize": "3456",
-                "bitrate": 88000,
+                "bitrate": 20746,
                 "duration": "61.2",
                 "samplingrate": 48000,
-                "channels": 2,
+                "channels": 1,
+            },
+            {
+                "url": f"https://mirror.example/feeds/nasa/audio/{audio_webm_path}",
+                "path": audio_webm_path,
+                "type": "audio/webm",
+                "medium": "audio",
+                "isDefault": "false",
+                "fileSize": "2345",
+                "bitrate": 48000,
+                "duration": "61.2",
+                "samplingrate": 48000,
+                "channels": 1,
             },
             {
                 "url": f"https://mirror.example/feeds/nasa/audio/{audio_base_path}",
@@ -477,8 +617,9 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
     }
     assert persisted == [
         (audio_base_path, "audio/mpeg"),
-        (f"{audio_base_path}-vbr7-3b2b0f13.mp3", "audio/mp3"),
-        (f"{audio_base_path}-vbr3-4a2a58d5.aac", "audio/aac"),
+        (audio_default_path, "audio/mpeg"),
+        (audio_m4a_path, "audio/mp4"),
+        (audio_webm_path, "audio/webm"),
     ]
 
     completed_item = pipeline.item_completed(
@@ -518,23 +659,70 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
         return str(output_path)
 
     monkeypatch.setattr(pipeline, "transcode", fake_transcode)
-    transcoded_suffix = "-720-457f0928.mp4"
-    monkeypatch.setattr(
-        media,
-        "probe_media",
-        lambda _: {
+    video_main_path = published_media_path(
+        FileType.VIDEO, source_url, repub_settings.REPUBLISHER_VIDEO[0]
+    )
+    video_fallback_path = published_media_path(
+        FileType.VIDEO, source_url, repub_settings.REPUBLISHER_VIDEO[1]
+    )
+
+    def fake_probe_media(file_path: str):
+        file_name = Path(file_path).name
+        if file_path.endswith(video_main_path) or file_name == "main.mp4":
+            return {
+                "format": {
+                    "duration": "60.0",
+                    "size": "9876",
+                    "bit_rate": "123456",
+                    "format_name": "mp4",
+                    "format_long_name": "MP4",
+                },
+                "streams": [
+                    {
+                        "codec_type": "video",
+                        "codec_name": "h264",
+                        "bit_rate": "123456",
+                        "duration_ts": "60000",
+                        "width": 1280,
+                        "height": 720,
+                        "avg_frame_rate": "30/1",
+                    },
+                    {
+                        "codec_type": "audio",
+                        "codec_name": "aac",
+                        "bit_rate": "96000",
+                        "duration_ts": "60000",
+                    },
+                ],
+            }
+        if file_path.endswith(video_fallback_path) or file_name == "fallback.webm":
+            return {
+                "format": {
+                    "duration": "60.0",
+                    "size": "6789",
+                    "bit_rate": "64000",
+                    "format_name": "matroska,webm",
+                    "format_long_name": "WebM",
+                },
+                "streams": [
+                    {
+                        "codec_type": "video",
+                        "codec_name": "vp9",
+                        "width": 1280,
+                        "height": 720,
+                        "avg_frame_rate": "25/1",
+                    },
+                    {
+                        "codec_type": "audio",
+                        "codec_name": "opus",
+                    },
+                ],
+            }
+        return {
             "format": {
                 "duration": "60.0",
-                "size": (
-                    "12345"
-                    if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                    else "9876"
-                ),
-                "bit_rate": (
-                    "456789"
-                    if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                    else "123456"
-                ),
+                "size": "12345",
+                "bit_rate": "456789",
                 "format_name": "mp4",
                 "format_long_name": "MP4",
             },
@@ -542,27 +730,11 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
                 {
                     "codec_type": "video",
                     "codec_name": "h264",
-                    "bit_rate": (
-                        "456789"
-                        if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                        else "123456"
-                    ),
+                    "bit_rate": "456789",
                     "duration_ts": "60000",
-                    "width": (
-                        640
-                        if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                        else 1280
-                    ),
-                    "height": (
-                        360
-                        if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                        else 720
-                    ),
-                    "avg_frame_rate": (
-                        "24/1"
-                        if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
-                        else "30/1"
-                    ),
+                    "width": 640,
+                    "height": 360,
+                    "avg_frame_rate": "24/1",
                 },
                 {
                     "codec_type": "audio",
@@ -571,8 +743,9 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
                     "duration_ts": "60000",
                 },
             ],
-        },
-    )
+        }
+
+    monkeypatch.setattr(media, "probe_media", fake_probe_media)
 
     def fake_persist_file(path, buf, info, meta=None, headers=None):
         del info, meta
@@ -596,20 +769,14 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
     assert isinstance(result["checksum"], str)
     assert result == {
         "url": source_url,
-        "path": f"{video_base_path}-720-457f0928.mp4",
-        "published_url": (
-            "https://mirror.example/feeds/nasa/video/"
-            f"{video_base_path}-720-457f0928.mp4"
-        ),
+        "path": video_main_path,
+        "published_url": (f"https://mirror.example/feeds/nasa/video/{video_main_path}"),
         "checksum": result["checksum"],
         "status": "downloaded",
         "variants": [
             {
-                "url": (
-                    "https://mirror.example/feeds/nasa/video/"
-                    f"{video_base_path}-720-457f0928.mp4"
-                ),
-                "path": f"{video_base_path}-720-457f0928.mp4",
+                "url": f"https://mirror.example/feeds/nasa/video/{video_main_path}",
+                "path": video_main_path,
                 "type": "video/mp4",
                 "medium": "video",
                 "isDefault": "true",
@@ -620,6 +787,19 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
                 "height": 720,
                 "framerate": "30/1",
             },
+            {
+                "url": f"https://mirror.example/feeds/nasa/video/{video_fallback_path}",
+                "path": video_fallback_path,
+                "type": "video/webm",
+                "medium": "video",
+                "isDefault": "false",
+                "fileSize": "6789",
+                "bitrate": 64000,
+                "duration": "60.0",
+                "width": 1280,
+                "height": 720,
+                "framerate": "25/1",
+            },
             {
                 "url": f"https://mirror.example/feeds/nasa/video/{video_base_path}",
                 "path": video_base_path,
@@ -637,7 +817,8 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
     }
     assert persisted == [
         (video_base_path, "video/mp4"),
-        (f"{video_base_path}-720-457f0928.mp4", "video/mp4"),
+        (video_main_path, "video/mp4"),
+        (video_fallback_path, "video/webm"),
     ]
 
 
@@ -650,13 +831,24 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
     source_url = "https://example.com/podcast.mp3"
     audio_base_path = local_audio_path(source_url)
     original_path = store_dir(pipeline) / audio_base_path
-    canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7-3b2b0f13.mp3"
-    secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3-4a2a58d5.aac"
+    audio_default_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0]
+    )
+    audio_m4a_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[1]
+    )
+    audio_webm_path = published_media_path(
+        FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[2]
+    )
+    canonical_path = store_dir(pipeline) / audio_default_path
+    m4a_path = store_dir(pipeline) / audio_m4a_path
+    webm_path = store_dir(pipeline) / audio_webm_path
     original_path.parent.mkdir(parents=True, exist_ok=True)
     original_path.write_bytes(b"original")
     canonical_path.parent.mkdir(parents=True, exist_ok=True)
     canonical_path.write_bytes(b"default")
-    secondary_path.write_bytes(b"alt")
+    m4a_path.write_bytes(b"alt-aac")
+    webm_path.write_bytes(b"alt-webm")
     stat_paths: list[str] = []
     original_stat_file = pipeline.store.stat_file
     item = ElementItem(
@@ -683,12 +875,38 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
         lambda file_path: {
             "format": {
                 "duration": "61.2",
-                "size": ("4567" if file_path.endswith("vbr7-3b2b0f13.mp3") else "3456"),
+                "size": (
+                    "4567"
+                    if file_path.endswith(audio_default_path)
+                    else (
+                        "3456"
+                        if file_path.endswith(audio_m4a_path)
+                        else "2345" if file_path.endswith(audio_webm_path) else "5678"
+                    )
+                ),
                 "bit_rate": (
-                    "96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
+                    "37209"
+                    if file_path.endswith(audio_default_path)
+                    else (
+                        "20746"
+                        if file_path.endswith(audio_m4a_path)
+                        else (
+                            "48000" if file_path.endswith(audio_webm_path) else "128000"
+                        )
+                    )
                 ),
                 "format_name": (
-                    "mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
+                    "mp3"
+                    if file_path.endswith(audio_default_path)
+                    else (
+                        "mov,mp4,m4a,3gp,3g2,mj2"
+                        if file_path.endswith(audio_m4a_path)
+                        else (
+                            "matroska,webm"
+                            if file_path.endswith(audio_webm_path)
+                            else "mp3"
+                        )
+                    )
                 ),
                 "format_long_name": "Audio",
             },
@@ -696,16 +914,36 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
                 {
                     "codec_type": "audio",
                     "codec_name": (
-                        "mp3" if file_path.endswith("vbr7-3b2b0f13.mp3") else "aac"
+                        "mp3"
+                        if file_path.endswith(audio_default_path)
+                        else (
+                            "aac"
+                            if file_path.endswith(audio_m4a_path)
+                            else (
+                                "opus" if file_path.endswith(audio_webm_path) else "mp3"
+                            )
+                        )
                     ),
                     "bit_rate": (
-                        "96000" if file_path.endswith("vbr7-3b2b0f13.mp3") else "88000"
+                        "37209"
+                        if file_path.endswith(audio_default_path)
+                        else (
+                            "20746"
+                            if file_path.endswith(audio_m4a_path)
+                            else (
+                                None
+                                if file_path.endswith(audio_webm_path)
+                                else "128000"
+                            )
+                        )
+                    ),
+                    "duration_ts": (
+                        None if file_path.endswith(audio_webm_path) else "61200"
                     ),
-                    "duration_ts": "61200",
                     "sample_rate": (
-                        "44100" if file_path.endswith("vbr7-3b2b0f13.mp3") else "48000"
+                        "44100" if file_path == str(original_path) else "48000"
                     ),
-                    "channels": 2,
+                    "channels": 1 if file_path != str(original_path) else 2,
                 }
             ],
         },
@@ -717,12 +955,13 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
         item=item,
     )
     assert result is not None
-    assert result["path"] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"
+    assert result["path"] == audio_default_path
     assert result["status"] == "uptodate"
     assert [variant.get("path") for variant in result["variants"]] == [
-        f"{audio_base_path}-vbr7-3b2b0f13.mp3",
-        f"{audio_base_path}-vbr3-4a2a58d5.aac",
+        audio_default_path,
+        audio_m4a_path,
+        audio_webm_path,
         audio_base_path,
     ]
     assert f"{audio_base_path}.mp3" not in stat_paths
-    assert stat_paths[0] == f"{audio_base_path}-vbr7-3b2b0f13.mp3"
+    assert stat_paths[0] == audio_default_path

From cebf037753d4240d793627f8d3d465c7bfbebe23 Mon Sep 17 00:00:00 2001
From: Abel Luck <abel@guardianproject.info>
Date: Wed, 1 Apr 2026 17:27:20 +0200
Subject: [PATCH 3/4] Prefer content over description for item bodies

---
 repub/spiders/rss_spider.py   | 12 ++++++-
 tests/test_feed_validation.py | 62 +++++++++++++++++++++++++++++++----
 2 files changed, 67 insertions(+), 7 deletions(-)

diff --git a/repub/spiders/rss_spider.py b/repub/spiders/rss_spider.py
index 80be20e..fa27317 100644
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@@ -281,6 +281,14 @@ class RssFeedSpider(BaseRssFeedSpider):
         file_urls = []
         audio_urls = []
         video_urls = []
+        source_description_html = (
+            sanitize_html(entry.get("summary", "")) if "summary_detail" in entry else ""
+        )
+        has_content_html = any(
+            c.type == "text/html" and ((getattr(c, "value", "") or "").strip() != "")
+            for c in entry.get("content", [])
+        )
+        description_html = source_description_html if has_content_html else ""
 
         def add_url(file_type, url):
             if file_type == FileType.IMAGE:
@@ -295,7 +303,7 @@ class RssFeedSpider(BaseRssFeedSpider):
         item = E.item(
             E.title(entry.get("title")),
             E.link(entry.get("link")),
-            E.description(sanitize_html(entry.get("description", ""))),
+            E.description(description_html),
             E.guid(
                 entry.get("id"),
                 {"isPermaLink": "true" if entry.guidislink else "false"},
@@ -341,6 +349,8 @@ class RssFeedSpider(BaseRssFeedSpider):
                     image_urls.extend(urls[FileType.IMAGE])
                     video_urls.extend(urls[FileType.VIDEO])
                     audio_urls.extend(urls[FileType.AUDIO])
+        if not has_content_html and source_description_html.strip() != "":
+            item.append(CONTENT.encoded(CDATA(source_description_html)))
 
         if isinstance(entry.get("media_content"), list):
             for media in (
diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py
index 290a90a..9e1f80b 100644
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@@ -437,10 +437,60 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
     assert "<" not in itunes_summary
     assert ">" not in itunes_summary
 
-    assert "contenteditable=" not in xml
-    assert "mode=" not in xml
-    assert "querystring=" not in xml
-    assert (
-        f"https://mirror.example/feeds/demo/images/{local_image_path(source_image)}"
-        in xml
+
+def test_item_body_uses_description_only_when_content_is_also_present() -> None:
+    xml, root = _serialize_feed(
+        feed_url="https://mirror.example",
+        feed_text="""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:content="http://purl.org/rss/1.0/modules/content/">
+  <channel>
+    <title>Demo Feed</title>
+    <link>https://source.example/feed</link>
+    <description>Demo description</description>
+    <item>
+      <title>Description Only</title>
+      <link>https://source.example/description-only</link>
+      <description><![CDATA[<p mode="summary">Description body</p>]]></description>
+      <guid isPermaLink="false">entry-description-only</guid>
+      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
+    </item>
+    <item>
+      <title>Content Only</title>
+      <link>https://source.example/content-only</link>
+      <guid isPermaLink="false">entry-content-only</guid>
+      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
+      <content:encoded><![CDATA[<div mode="body">Content body</div>]]></content:encoded>
+    </item>
+    <item>
+      <title>Both Present</title>
+      <link>https://source.example/both-present</link>
+      <description><![CDATA[<p mode="summary">Summary body</p>]]></description>
+      <guid isPermaLink="false">entry-both-present</guid>
+      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
+      <content:encoded><![CDATA[<div mode="body">Full body</div>]]></content:encoded>
+    </item>
+  </channel>
+</rss>
+""",
+    )
+
+    items = root.findall("./channel/item")
+    assert len(items) == 3
+
+    description_only, content_only, both_present = items
+
+    assert description_only.findtext("description") in (None, "")
+    assert description_only.findtext("content:encoded", namespaces=nsmap) == (
+        "<p>Description body</p>"
+    )
+
+    assert content_only.findtext("description") in (None, "")
+    assert content_only.findtext("content:encoded", namespaces=nsmap) == (
+        "<div>Content body</div>"
+    )
+
+    assert both_present.findtext("description") == "<p>Summary body</p>"
+    assert both_present.findtext("content:encoded", namespaces=nsmap) == (
+        "<div>Full body</div>"
     )

From 180677efa71b97f0a9d9cd2d4ea0e4bcdac79a98 Mon Sep 17 00:00:00 2001
From: Abel Luck <abel@guardianproject.info>
Date: Wed, 1 Apr 2026 17:29:27 +0200
Subject: [PATCH 4/4] Update pygea dependency

---
 uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/uv.lock b/uv.lock
index 50e2792..857e52d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -936,7 +936,7 @@ wheels = [
 [[package]]
 name = "pygea"
 version = "0.1.0"
-source = { git = "https://guardianproject.dev/anynews/pygea.git#bff04afbf612612108d9651f355d066c4b5f6a64" }
+source = { git = "https://guardianproject.dev/anynews/pygea.git#c58bac3abddc019c74a1de45835688461f39f2d0" }
 dependencies = [
     { name = "beautifulsoup4" },
     { name = "feedgen" },