Fix published paths for transcoded media
This commit is contained in:
parent
3f33994cdc
commit
89d462e280
9 changed files with 956 additions and 114 deletions
|
|
@ -3,22 +3,34 @@ from __future__ import annotations
|
|||
import re
|
||||
from email.utils import parsedate_to_datetime
|
||||
from io import BytesIO
|
||||
from typing import Callable
|
||||
|
||||
import lxml.etree as etree
|
||||
from scrapy.http import TextResponse
|
||||
from scrapy.settings import Settings
|
||||
|
||||
from repub import settings as repub_settings
|
||||
from repub.exporters import RssExporter
|
||||
from repub.items import ElementItem
|
||||
from repub.rss import nsmap
|
||||
from repub.spiders.rss_spider import RssFeedSpider
|
||||
from repub.utils import local_audio_path, local_file_path, local_image_path
|
||||
from repub.utils import local_audio_path, local_image_path, local_video_path
|
||||
|
||||
RSS_DATE_PATTERN = re.compile(
|
||||
r"^[A-Z][a-z]{2}, \d{2} [A-Z][a-z]{2} \d{4} \d{2}:\d{2}:\d{2} [+-]\d{4}$"
|
||||
)
|
||||
|
||||
|
||||
def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Element]:
|
||||
def _published_url(feed_url: str, path: str) -> str:
|
||||
return f"{feed_url}/feeds/demo/{path}"
|
||||
|
||||
|
||||
def _serialize_feed(
|
||||
*,
|
||||
feed_text: str,
|
||||
feed_url: str,
|
||||
prepare_item: Callable[[ElementItem], None] | None = None,
|
||||
) -> tuple[str, etree._Element]:
|
||||
spider = RssFeedSpider(feed_name="demo", url="https://source.example/feed.rss")
|
||||
spider.settings = Settings(
|
||||
values={
|
||||
|
|
@ -26,6 +38,8 @@ def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Eleme
|
|||
"REPUBLISHER_FILE_DIR": "files",
|
||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||
"REPUBLISHER_VIDEO_DIR": "video",
|
||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||
"REPUBLISHER_FEED_URL": feed_url,
|
||||
}
|
||||
)
|
||||
|
|
@ -39,6 +53,8 @@ def _serialize_feed(*, feed_text: str, feed_url: str) -> tuple[str, etree._Eleme
|
|||
exporter = RssExporter(output)
|
||||
exporter.start_exporting()
|
||||
for item in list(spider._parse(response) or []):
|
||||
if prepare_item is not None and isinstance(item, ElementItem):
|
||||
prepare_item(item)
|
||||
exporter.export_item(item)
|
||||
exporter.finish_exporting()
|
||||
|
||||
|
|
@ -53,8 +69,88 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
source_video = "https://source.example/media/video.mp4"
|
||||
channel_image = "https://source.example/media/channel.png"
|
||||
item_image = "https://source.example/media/cover.jpg"
|
||||
|
||||
def prepare_item(item: ElementItem) -> None:
|
||||
audio_base_path = local_audio_path(source_audio)
|
||||
video_base_path = local_video_path(source_video)
|
||||
item.audios = [
|
||||
{
|
||||
"url": source_audio,
|
||||
"path": f"{audio_base_path}-vbr7.mp3",
|
||||
"published_url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"audio/{audio_base_path}-vbr7.mp3",
|
||||
),
|
||||
"checksum": "audio-default",
|
||||
"status": "downloaded",
|
||||
"variants": [
|
||||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"audio/{audio_base_path}-vbr7.mp3",
|
||||
),
|
||||
"path": f"{audio_base_path}-vbr7.mp3",
|
||||
"type": "audio/mp3",
|
||||
"medium": "audio",
|
||||
"isDefault": "true",
|
||||
"fileSize": "4567",
|
||||
"bitrate": "96000",
|
||||
"duration": "61.2",
|
||||
"samplingrate": "44100",
|
||||
"channels": "2",
|
||||
},
|
||||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"audio/{audio_base_path}-vbr3.aac",
|
||||
),
|
||||
"path": f"{audio_base_path}-vbr3.aac",
|
||||
"type": "audio/aac",
|
||||
"medium": "audio",
|
||||
"isDefault": "false",
|
||||
"fileSize": "3456",
|
||||
"bitrate": "88000",
|
||||
"duration": "61.2",
|
||||
"samplingrate": "48000",
|
||||
"channels": "2",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
item.videos = [
|
||||
{
|
||||
"url": source_video,
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"published_url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"video/{video_base_path}-720.mp4",
|
||||
),
|
||||
"checksum": "video-default",
|
||||
"status": "downloaded",
|
||||
"variants": [
|
||||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"video/{video_base_path}-720.mp4",
|
||||
),
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "true",
|
||||
"fileSize": "9876",
|
||||
"bitrate": "123456",
|
||||
"duration": "60.0",
|
||||
"width": "1280",
|
||||
"height": "720",
|
||||
"framerate": "30/1",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
xml, root = _serialize_feed(
|
||||
feed_url="https://mirror.example",
|
||||
prepare_item=prepare_item,
|
||||
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
|
|
@ -130,25 +226,73 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
enclosure = root.find("./channel/item/enclosure")
|
||||
assert enclosure is not None
|
||||
assert enclosure.attrib == {
|
||||
"url": f"https://mirror.example/feeds/demo/audio/{local_audio_path(source_audio)}",
|
||||
"length": "123",
|
||||
"type": "audio/mpeg",
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/audio/"
|
||||
f"{local_audio_path(source_audio)}-vbr7.mp3"
|
||||
),
|
||||
"length": "4567",
|
||||
"type": "audio/mp3",
|
||||
}
|
||||
assert len(enclosure) == 0
|
||||
|
||||
media_content = root.find("./channel/item/media:content", namespaces=nsmap)
|
||||
assert media_content is not None
|
||||
assert media_content.attrib == {
|
||||
"url": f"https://mirror.example/feeds/demo/video/{local_file_path(source_video)}",
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"expression": "full",
|
||||
"duration": "60",
|
||||
"width": "640",
|
||||
"height": "360",
|
||||
"lang": "en",
|
||||
}
|
||||
assert len(media_content) == 0
|
||||
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
|
||||
|
||||
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
|
||||
assert len(media_groups) == 2
|
||||
|
||||
audio_group, video_group = media_groups
|
||||
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
|
||||
assert [variant.attrib for variant in audio_variants] == [
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/audio/"
|
||||
f"{local_audio_path(source_audio)}-vbr7.mp3"
|
||||
),
|
||||
"type": "audio/mp3",
|
||||
"medium": "audio",
|
||||
"isDefault": "true",
|
||||
"bitrate": "96000",
|
||||
"samplingrate": "44100",
|
||||
"channels": "2",
|
||||
"duration": "61.2",
|
||||
"fileSize": "4567",
|
||||
},
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/audio/"
|
||||
f"{local_audio_path(source_audio)}-vbr3.aac"
|
||||
),
|
||||
"type": "audio/aac",
|
||||
"medium": "audio",
|
||||
"isDefault": "false",
|
||||
"bitrate": "88000",
|
||||
"samplingrate": "48000",
|
||||
"channels": "2",
|
||||
"duration": "61.2",
|
||||
"fileSize": "3456",
|
||||
},
|
||||
]
|
||||
|
||||
video_variants = video_group.findall("media:content", namespaces=nsmap)
|
||||
assert [variant.attrib for variant in video_variants] == [
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/video/"
|
||||
f"{local_video_path(source_video)}-720.mp4"
|
||||
),
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "true",
|
||||
"expression": "full",
|
||||
"bitrate": "123456",
|
||||
"framerate": "30/1",
|
||||
"duration": "60.0",
|
||||
"height": "720",
|
||||
"width": "1280",
|
||||
"lang": "en",
|
||||
"fileSize": "9876",
|
||||
}
|
||||
]
|
||||
|
||||
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
||||
assert itunes_image is not None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue