Fix published paths for transcoded media

This commit is contained in:
Abel Luck 2026-03-31 14:14:46 +02:00
parent 3f33994cdc
commit 89d462e280
9 changed files with 956 additions and 114 deletions

View file

@ -3,22 +3,34 @@ from __future__ import annotations
import re
from email.utils import parsedate_to_datetime
from io import BytesIO
from typing import Callable
import lxml.etree as etree
from scrapy.http import TextResponse
from scrapy.settings import Settings
from repub import settings as repub_settings
from repub.exporters import RssExporter
from repub.items import ElementItem
from repub.rss import nsmap
from repub.spiders.rss_spider import RssFeedSpider
from repub.utils import local_audio_path, local_file_path, local_image_path
from repub.utils import local_audio_path, local_image_path, local_video_path
RSS_DATE_PATTERN = re.compile(
r"^[A-Z][a-z]{2}, \d{2} [A-Z][a-z]{2} \d{4} \d{2}:\d{2}:\d{2} [+-]\d{4}$"
)
def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Element]:
def _published_url(feed_url: str, path: str) -> str:
return f"{feed_url}/feeds/demo/{path}"
def _serialize_feed(
*,
feed_text: str,
feed_url: str,
prepare_item: Callable[[ElementItem], None] | None = None,
) -> tuple[str, etree._Element]:
spider = RssFeedSpider(feed_name="demo", url="https://source.example/feed.rss")
spider.settings = Settings(
values={
@ -26,6 +38,8 @@ def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Eleme
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
"REPUBLISHER_FEED_URL": feed_url,
}
)
@ -39,6 +53,8 @@ def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Eleme
exporter = RssExporter(output)
exporter.start_exporting()
for item in list(spider._parse(response) or []):
if prepare_item is not None and isinstance(item, ElementItem):
prepare_item(item)
exporter.export_item(item)
exporter.finish_exporting()
@ -53,8 +69,88 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
source_video = "https://source.example/media/video.mp4"
channel_image = "https://source.example/media/channel.png"
item_image = "https://source.example/media/cover.jpg"
def prepare_item(item: ElementItem) -> None:
audio_base_path = local_audio_path(source_audio)
video_base_path = local_video_path(source_video)
item.audios = [
{
"url": source_audio,
"path": f"{audio_base_path}-vbr7.mp3",
"published_url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3",
),
"checksum": "audio-default",
"status": "downloaded",
"variants": [
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr7.mp3",
),
"path": f"{audio_base_path}-vbr7.mp3",
"type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
"fileSize": "4567",
"bitrate": "96000",
"duration": "61.2",
"samplingrate": "44100",
"channels": "2",
},
{
"url": _published_url(
"https://mirror.example",
f"audio/{audio_base_path}-vbr3.aac",
),
"path": f"{audio_base_path}-vbr3.aac",
"type": "audio/aac",
"medium": "audio",
"isDefault": "false",
"fileSize": "3456",
"bitrate": "88000",
"duration": "61.2",
"samplingrate": "48000",
"channels": "2",
},
],
}
]
item.videos = [
{
"url": source_video,
"path": f"{video_base_path}-720.mp4",
"published_url": _published_url(
"https://mirror.example",
f"video/{video_base_path}-720.mp4",
),
"checksum": "video-default",
"status": "downloaded",
"variants": [
{
"url": _published_url(
"https://mirror.example",
f"video/{video_base_path}-720.mp4",
),
"path": f"{video_base_path}-720.mp4",
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
"fileSize": "9876",
"bitrate": "123456",
"duration": "60.0",
"width": "1280",
"height": "720",
"framerate": "30/1",
}
],
}
]
xml, root = _serialize_feed(
feed_url="https://mirror.example",
prepare_item=prepare_item,
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
@ -130,25 +226,73 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
enclosure = root.find("./channel/item/enclosure")
assert enclosure is not None
assert enclosure.attrib == {
"url": f"https://mirror.example/feeds/demo/audio/{local_audio_path(source_audio)}",
"length": "123",
"type": "audio/mpeg",
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3"
),
"length": "4567",
"type": "audio/mp3",
}
assert len(enclosure) == 0
media_content = root.find("./channel/item/media:content", namespaces=nsmap)
assert media_content is not None
assert media_content.attrib == {
"url": f"https://mirror.example/feeds/demo/video/{local_file_path(source_video)}",
"type": "video/mp4",
"medium": "video",
"expression": "full",
"duration": "60",
"width": "640",
"height": "360",
"lang": "en",
}
assert len(media_content) == 0
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
assert len(media_groups) == 2
audio_group, video_group = media_groups
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
assert [variant.attrib for variant in audio_variants] == [
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr7.mp3"
),
"type": "audio/mp3",
"medium": "audio",
"isDefault": "true",
"bitrate": "96000",
"samplingrate": "44100",
"channels": "2",
"duration": "61.2",
"fileSize": "4567",
},
{
"url": (
f"https://mirror.example/feeds/demo/audio/"
f"{local_audio_path(source_audio)}-vbr3.aac"
),
"type": "audio/aac",
"medium": "audio",
"isDefault": "false",
"bitrate": "88000",
"samplingrate": "48000",
"channels": "2",
"duration": "61.2",
"fileSize": "3456",
},
]
video_variants = video_group.findall("media:content", namespaces=nsmap)
assert [variant.attrib for variant in video_variants] == [
{
"url": (
f"https://mirror.example/feeds/demo/video/"
f"{local_video_path(source_video)}-720.mp4"
),
"type": "video/mp4",
"medium": "video",
"isDefault": "true",
"expression": "full",
"bitrate": "123456",
"framerate": "30/1",
"duration": "60.0",
"height": "720",
"width": "1280",
"lang": "en",
"fileSize": "9876",
}
]
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
assert itunes_image is not None