diff --git a/repub/media.py b/repub/media.py
index aec3a4d..fe8074b 100644
--- a/repub/media.py
+++ b/repub/media.py
@@ -77,7 +77,7 @@ def probe_media(file_path) -> Dict[str, Any]:
def bitrate(info) -> float:
try:
return int(info["format"]["bit_rate"])
- except (KeyError, ValueError):
+ except KeyError | ValueError:
logger.error("extracting bitrate from ffprobe failed")
return math.inf
@@ -85,34 +85,16 @@ def bitrate(info) -> float:
def format_name(info) -> Optional[str]:
try:
return info["format"]["format_name"]
- except (KeyError, ValueError):
+ except KeyError | ValueError:
logger.error("extracting format from ffprobe failed")
return None
-def _stream_duration_sort_key(stream: Dict[str, Any]) -> tuple[int, float]:
- duration_ts = _int_value(stream.get("duration_ts"))
- if duration_ts is not None:
- return 1, float(duration_ts)
- try:
- duration = float(str(stream.get("duration", "")))
- except (TypeError, ValueError):
- duration = 0.0
- return 0, duration
-
-
-def _matches_format(probe: Dict[str, Any], expected: str) -> bool:
- current = format_name(probe)
- if current is None:
- return False
- return expected in current.split(",")
-
-
def primary_video_stream(probe):
video_streams = [
stream for stream in probe["streams"] if stream["codec_type"] == "video"
]
- video_streams = sorted(video_streams, key=_stream_duration_sort_key, reverse=True)
+ video_streams = sorted(video_streams, key=lambda x: x["duration_ts"], reverse=True)
if not video_streams:
return None
if len(video_streams) > 1:
@@ -126,7 +108,7 @@ def primary_audio_stream(probe):
audio_streams = [
stream for stream in probe["streams"] if stream["codec_type"] == "audio"
]
- audio_streams = sorted(audio_streams, key=_stream_duration_sort_key, reverse=True)
+ audio_streams = sorted(audio_streams, key=lambda x: x["duration_ts"], reverse=True)
if not audio_streams:
return None
if len(audio_streams) > 1:
@@ -144,7 +126,7 @@ def get_resolution(probe) -> Tuple[Optional[float], Optional[float]]:
width = int(video_stream["width"])
height = int(video_stream["height"])
return width, height
- except (KeyError, ValueError):
+ except KeyError | ValueError:
logger.error("extracting resolution from ffprobe failed")
return None, None
@@ -155,7 +137,7 @@ def get_vcodec_name(probe) -> Optional[str]:
if not video_stream:
return None
return video_stream["codec_name"]
- except (KeyError, ValueError):
+ except KeyError | ValueError:
logger.error("extracting video codec_name from ffprobe failed")
return None
@@ -165,11 +147,8 @@ def get_acodec_info(probe) -> Tuple[Optional[str], Optional[int]]:
audio_stream = primary_audio_stream(probe)
if not audio_stream:
return None, None
- audio_bitrate = _int_value(
- audio_stream.get("bit_rate") or probe["format"].get("bit_rate")
- )
- return audio_stream["codec_name"], audio_bitrate
- except (KeyError, ValueError):
+ return audio_stream["codec_name"], int(audio_stream["bit_rate"])
+ except KeyError | ValueError:
logger.error("extracting audio codec_name from ffprobe failed")
return None, None
@@ -239,7 +218,7 @@ def audio_transcode_params(
is_br = True
else:
is_br = False
- if _matches_format(probe_result, fmt):
+ if format_name(probe_result) == fmt:
is_fmt = True
else:
is_fmt = False
@@ -310,7 +289,11 @@ def video_transcode_params(
# TODO: turn this into an exception and catch it for reporting
return None
- is_container = _matches_format(probe_result, target_container)
+ current_container_many = format_name(probe_result)
+ is_container = False
+ if current_container_many is not None:
+ if target_container in current_container_many.split(","):
+ is_container = True
is_vcodec = vcodec == target_vcodec
is_acodec = acodec == target_acodec
diff --git a/repub/settings.py b/repub/settings.py
index 252c974..d39b635 100644
--- a/repub/settings.py
+++ b/repub/settings.py
@@ -102,95 +102,79 @@ MEDIA_ALLOW_REDIRECTS = True
REPUBLISHER_AUDIO = [
{
- "name": "mp3_vbr7_voice",
+ "name": "vbr7",
"format": "mp3",
- "max_bitrate": 64000,
- "mimetype": "audio/mpeg",
+ "max_bitrate": 96000,
+ "mimetype": "audio/mp3",
"extension": "mp3",
"ffmpeg_audio_params": {
"acodec": "libmp3lame",
+ # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
"qscale:a": "7",
- "ac": "1",
- "ar": "48000",
},
},
{
- "name": "m4a_aac_vbr2_voice",
- "format": "m4a",
- "max_bitrate": 64000,
- "mimetype": "audio/mp4",
- "extension": "m4a",
+ "name": "vbr3",
+ "format": "aac",
+ "max_bitrate": 96000,
+ "mimetype": "audio/aac",
+ "extension": "aac",
"ffmpeg_audio_params": {
"acodec": "libfdk_aac",
- "vbr": "2",
- "ac": "1",
- "ar": "48000",
- },
- },
- {
- "name": "webm_opus_voice_48k",
- "format": "webm",
- "max_bitrate": 48000,
- "mimetype": "audio/webm",
- "extension": "webm",
- "ffmpeg_audio_params": {
- "acodec": "libopus",
- "b:a": "48k",
- "ac": "1",
- "ar": "48000",
- "application": "voip",
+ # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
+ "vbr": "3",
},
},
]
REPUBLISHER_VIDEO = [
- # broadly compatible
{
- "name": "main",
+ "name": "720",
"container": "mp4",
"vcodec": "h264",
- "acodec": "aac",
+ "acodec": "mp3",
"audio_max_bitrate": 96000,
"ffmpeg_audio_params": {
- "acodec": "aac",
- "b:a": "96k",
- "ac": "2",
- "ar": "48000",
- },
- "ffmpeg_video_params": {
- "vcodec": "libx264",
- "pix_fmt": "yuv420p",
- "profile:v": "main",
- "level": "4.0",
- "preset": "medium",
- "crf": "22",
- "movflags": "+faststart",
+ "acodec": "libmp3lame",
+ # https://trac.ffmpeg.org/wiki/Encode/MP3#VBREncoding
+ "qscale:a": "7",
},
+ "ffmpeg_video_params": {"vcodec": "h264", "strict": "-2"},
"max_height": 720,
"mimetype": "video/mp4",
"extension": "mp4",
},
- # linux fallback without patent encumberance
- {
- "name": "fallback",
- "container": "webm",
- "vcodec": "vp9",
- "acodec": "opus",
- "audio_max_bitrate": 96000,
- "ffmpeg_audio_params": {
- "acodec": "libopus",
- "b:a": "96k",
- "ac": "2",
- "ar": "48000",
- },
- "ffmpeg_video_params": {
- "vcodec": "libvpx-vp9",
- "crf": "33",
- "b:v": "0",
- },
- "max_height": 720,
- "mimetype": "video/webm",
- "extension": "webm",
- },
+ # {
+ # "passes": [
+ # {
+ # "c:v": "libvpx-vp9",
+ # "b:v": "0",
+ # "crf": "30",
+ # "pass": "1",
+ # "deadline": "good",
+ # "row-mt": "1",
+ # "f": "null",
+ # },
+ # {
+ # "c:v": "libvpx-vp9",
+ # "b:v": "0",
+ # "crf": "30",
+ # "pass": "2",
+ # "deadline": "good",
+ # "row-mt": "1",
+ # "c:a": "libopus",
+ # "b:a": "96k",
+ # "ac": "2",
+ # },
+ # ],
+ # "name": "720",
+ # "container": "webm",
+ # "vcodec": "libvpx-vp9",
+ # "acodec": "opus",
+ # "audio_max_bitrate": 96000,
+ # "max_height": 720,
+ # "mimetype": "video/webm",
+ # "extension": "webm",
+ # },
]
REPUBLISHER_FFMPEG_ENCODERS = ["libmp3lame", "libfdk_aac", "libvpx-vp9", "libopus"]
diff --git a/repub/spiders/rss_spider.py b/repub/spiders/rss_spider.py
index fa27317..80be20e 100644
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@@ -281,14 +281,6 @@ class RssFeedSpider(BaseRssFeedSpider):
file_urls = []
audio_urls = []
video_urls = []
- source_description_html = (
- sanitize_html(entry.get("summary", "")) if "summary_detail" in entry else ""
- )
- has_content_html = any(
- c.type == "text/html" and ((getattr(c, "value", "") or "").strip() != "")
- for c in entry.get("content", [])
- )
- description_html = source_description_html if has_content_html else ""
def add_url(file_type, url):
if file_type == FileType.IMAGE:
@@ -303,7 +295,7 @@ class RssFeedSpider(BaseRssFeedSpider):
item = E.item(
E.title(entry.get("title")),
E.link(entry.get("link")),
- E.description(description_html),
+ E.description(sanitize_html(entry.get("description", ""))),
E.guid(
entry.get("id"),
{"isPermaLink": "true" if entry.guidislink else "false"},
@@ -349,8 +341,6 @@ class RssFeedSpider(BaseRssFeedSpider):
image_urls.extend(urls[FileType.IMAGE])
video_urls.extend(urls[FileType.VIDEO])
audio_urls.extend(urls[FileType.AUDIO])
- if not has_content_html and source_description_html.strip() != "":
- item.append(CONTENT.encoded(CDATA(source_description_html)))
if isinstance(entry.get("media_content"), list):
for media in (
diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py
index 9e1f80b..22589b4 100644
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@@ -14,13 +14,7 @@ from repub.exporters import RssExporter
from repub.items import ElementItem
from repub.rss import nsmap
from repub.spiders.rss_spider import RssFeedSpider
-from repub.utils import (
- FileType,
- local_audio_path,
- local_image_path,
- local_video_path,
- published_media_path,
-)
+from repub.utils import local_audio_path, local_image_path, local_video_path
RSS_DATE_PATTERN = re.compile(
r"^[A-Z][a-z]{2}, \d{2} [A-Z][a-z]{2} \d{4} \d{2}:\d{2}:\d{2} [+-]\d{4}$"
@@ -75,32 +69,17 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
source_video = "https://source.example/media/video.mp4"
channel_image = "https://source.example/media/channel.png"
item_image = "https://source.example/media/cover.jpg"
- audio_base_path = local_audio_path(source_audio)
- audio_default_path = published_media_path(
- FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
- )
- audio_m4a_path = published_media_path(
- FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[1]
- )
- audio_webm_path = published_media_path(
- FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[2]
- )
- video_base_path = local_video_path(source_video)
- video_main_path = published_media_path(
- FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[0]
- )
- video_fallback_path = published_media_path(
- FileType.VIDEO, source_video, repub_settings.REPUBLISHER_VIDEO[1]
- )
def prepare_item(item: ElementItem) -> None:
+ audio_base_path = local_audio_path(source_audio)
+ video_base_path = local_video_path(source_video)
item.audios = [
{
"url": source_audio,
- "path": audio_default_path,
+ "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
"published_url": _published_url(
"https://mirror.example",
- f"audio/{audio_default_path}",
+ f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
),
"checksum": "audio-default",
"status": "downloaded",
@@ -108,47 +87,32 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
- f"audio/{audio_default_path}",
+ f"audio/{audio_base_path}-vbr7-3b2b0f13.mp3",
),
- "path": audio_default_path,
- "type": "audio/mpeg",
+ "path": f"{audio_base_path}-vbr7-3b2b0f13.mp3",
+ "type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
"fileSize": "4567",
- "bitrate": "37209",
+ "bitrate": "96000",
"duration": "61.2",
- "samplingrate": "48000",
- "channels": "1",
+ "samplingrate": "44100",
+ "channels": "2",
},
{
"url": _published_url(
"https://mirror.example",
- f"audio/{audio_m4a_path}",
+ f"audio/{audio_base_path}-vbr3-4a2a58d5.aac",
),
- "path": audio_m4a_path,
- "type": "audio/mp4",
+ "path": f"{audio_base_path}-vbr3-4a2a58d5.aac",
+ "type": "audio/aac",
"medium": "audio",
"isDefault": "false",
"fileSize": "3456",
- "bitrate": "20746",
+ "bitrate": "88000",
"duration": "61.2",
"samplingrate": "48000",
- "channels": "1",
- },
- {
- "url": _published_url(
- "https://mirror.example",
- f"audio/{audio_webm_path}",
- ),
- "path": audio_webm_path,
- "type": "audio/webm",
- "medium": "audio",
- "isDefault": "false",
- "fileSize": "2345",
- "bitrate": "48000",
- "duration": "61.2",
- "samplingrate": "48000",
- "channels": "1",
+ "channels": "2",
},
{
"url": _published_url(
@@ -171,10 +135,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
item.videos = [
{
"url": source_video,
- "path": video_main_path,
+ "path": f"{video_base_path}-720-457f0928.mp4",
"published_url": _published_url(
"https://mirror.example",
- f"video/{video_main_path}",
+ f"video/{video_base_path}-720-457f0928.mp4",
),
"checksum": "video-default",
"status": "downloaded",
@@ -182,9 +146,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
{
"url": _published_url(
"https://mirror.example",
- f"video/{video_main_path}",
+ f"video/{video_base_path}-720-457f0928.mp4",
),
- "path": video_main_path,
+ "path": f"{video_base_path}-720-457f0928.mp4",
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
@@ -195,22 +159,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
"height": "720",
"framerate": "30/1",
},
- {
- "url": _published_url(
- "https://mirror.example",
- f"video/{video_fallback_path}",
- ),
- "path": video_fallback_path,
- "type": "video/webm",
- "medium": "video",
- "isDefault": "false",
- "fileSize": "6789",
- "bitrate": "64000",
- "duration": "60.0",
- "width": "1280",
- "height": "720",
- "framerate": "25/1",
- },
{
"url": _published_url(
"https://mirror.example",
@@ -309,9 +257,12 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
enclosure = root.find("./channel/item/enclosure")
assert enclosure is not None
assert enclosure.attrib == {
- "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"),
+ "url": (
+ f"https://mirror.example/feeds/demo/audio/"
+ f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
+ ),
"length": "4567",
- "type": "audio/mpeg",
+ "type": "audio/mp3",
}
assert len(enclosure) == 0
@@ -325,39 +276,32 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert [variant.attrib for variant in audio_variants] == [
{
"url": (
- f"https://mirror.example/feeds/demo/audio/" f"{audio_default_path}"
+ f"https://mirror.example/feeds/demo/audio/"
+ f"{local_audio_path(source_audio)}-vbr7-3b2b0f13.mp3"
),
- "type": "audio/mpeg",
+ "type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
- "bitrate": "37209",
- "samplingrate": "48000",
- "channels": "1",
+ "bitrate": "96000",
+ "samplingrate": "44100",
+ "channels": "2",
"duration": "61.2",
"fileSize": "4567",
},
{
- "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_m4a_path}"),
- "type": "audio/mp4",
+ "url": (
+ f"https://mirror.example/feeds/demo/audio/"
+ f"{local_audio_path(source_audio)}-vbr3-4a2a58d5.aac"
+ ),
+ "type": "audio/aac",
"medium": "audio",
"isDefault": "false",
- "bitrate": "20746",
+ "bitrate": "88000",
"samplingrate": "48000",
- "channels": "1",
+ "channels": "2",
"duration": "61.2",
"fileSize": "3456",
},
- {
- "url": (f"https://mirror.example/feeds/demo/audio/" f"{audio_webm_path}"),
- "type": "audio/webm",
- "medium": "audio",
- "isDefault": "false",
- "bitrate": "48000",
- "samplingrate": "48000",
- "channels": "1",
- "duration": "61.2",
- "fileSize": "2345",
- },
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
@@ -377,7 +321,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
video_variants = video_group.findall("media:content", namespaces=nsmap)
assert [variant.attrib for variant in video_variants] == [
{
- "url": (f"https://mirror.example/feeds/demo/video/" f"{video_main_path}"),
+ "url": (
+ f"https://mirror.example/feeds/demo/video/"
+ f"{local_video_path(source_video)}-720-457f0928.mp4"
+ ),
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
@@ -390,22 +337,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
"lang": "en",
"fileSize": "9876",
},
- {
- "url": (
- f"https://mirror.example/feeds/demo/video/" f"{video_fallback_path}"
- ),
- "type": "video/webm",
- "medium": "video",
- "isDefault": "false",
- "expression": "full",
- "bitrate": "64000",
- "framerate": "25/1",
- "duration": "60.0",
- "height": "720",
- "width": "1280",
- "lang": "en",
- "fileSize": "6789",
- },
{
"url": (
f"https://mirror.example/feeds/demo/video/"
@@ -437,60 +368,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert "<" not in itunes_summary
assert ">" not in itunes_summary
-
-def test_item_body_uses_description_only_when_content_is_also_present() -> None:
- xml, root = _serialize_feed(
- feed_url="https://mirror.example",
- feed_text="""
-
Description body
" - ) - - assert content_only.findtext("description") in (None, "") - assert content_only.findtext("content:encoded", namespaces=nsmap) == ( - "Summary body
" - assert both_present.findtext("content:encoded", namespaces=nsmap) == ( - "