Include original media in media groups
This commit is contained in:
parent
89d462e280
commit
954608c5f9
3 changed files with 212 additions and 14 deletions
|
|
@ -1,5 +1,6 @@
|
|||
import hashlib
|
||||
import logging
|
||||
import mimetypes
|
||||
import tempfile
|
||||
import time
|
||||
from io import BytesIO
|
||||
|
|
@ -116,6 +117,26 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
for index, setting in enumerate(settings)
|
||||
]
|
||||
|
||||
def original_path(self, source_url: str) -> str:
|
||||
if self.media_type == repub.utils.FileType.AUDIO:
|
||||
return repub.utils.local_audio_path(source_url)
|
||||
if self.media_type == repub.utils.FileType.VIDEO:
|
||||
return repub.utils.local_video_path(source_url)
|
||||
raise ValueError(f"Unsupported media type: {self.media_type}")
|
||||
|
||||
def original_mimetype(self, source_url: str, response=None) -> str:
|
||||
if response is not None:
|
||||
content_type = response.headers.get(b"Content-Type")
|
||||
if content_type:
|
||||
return content_type.decode("utf-8").split(";", 1)[0].strip()
|
||||
mimetype = mimetypes.guess_type(source_url)[0]
|
||||
if mimetype:
|
||||
return mimetype
|
||||
return {
|
||||
repub.utils.FileType.AUDIO: "audio/mpeg",
|
||||
repub.utils.FileType.VIDEO: "video/mp4",
|
||||
}[self.media_type]
|
||||
|
||||
def published_url(self, path: str, item=None) -> str:
|
||||
relative_path = f"{self.media_dir()}/{path}"
|
||||
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
||||
|
|
@ -130,7 +151,7 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
self,
|
||||
*,
|
||||
path: str,
|
||||
setting: media.MediaSettings,
|
||||
mimetype: str,
|
||||
probe_result: dict[str, Any],
|
||||
is_default: bool,
|
||||
item=None,
|
||||
|
|
@ -138,7 +159,7 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
variant: MediaVariant = {
|
||||
"url": self.published_url(path, item),
|
||||
"path": path,
|
||||
"type": setting["mimetype"],
|
||||
"type": mimetype,
|
||||
"medium": self.media_type.value,
|
||||
"isDefault": "true" if is_default else "false",
|
||||
}
|
||||
|
|
@ -158,12 +179,24 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
variants.append(
|
||||
self.media_variant(
|
||||
path=path,
|
||||
setting=setting,
|
||||
mimetype=setting["mimetype"],
|
||||
probe_result=probe_result,
|
||||
is_default=is_default,
|
||||
item=item,
|
||||
)
|
||||
)
|
||||
original_path = self.original_path(request.url)
|
||||
original_file = self.local_store_path(original_path)
|
||||
if original_file.exists():
|
||||
variants.append(
|
||||
self.media_variant(
|
||||
path=original_path,
|
||||
mimetype=self.original_mimetype(request.url),
|
||||
probe_result=media.probe_media(str(original_file)),
|
||||
is_default=False,
|
||||
item=item,
|
||||
)
|
||||
)
|
||||
return variants
|
||||
|
||||
def make_file_result(
|
||||
|
|
@ -201,6 +234,11 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
for _, _, path in self.variant_paths(request.url):
|
||||
if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
|
||||
return None
|
||||
if not cast(
|
||||
dict[str, Any] | None,
|
||||
self.store.stat_file(self.original_path(request.url), info),
|
||||
):
|
||||
return None
|
||||
self.inc_stats("uptodate")
|
||||
return self.make_file_result(
|
||||
request,
|
||||
|
|
@ -218,6 +256,23 @@ class TranscodePipeline(BaseFilesPipeline):
|
|||
tmp_file = f"{tmp_dir}/original"
|
||||
with open(tmp_file, "wb") as f:
|
||||
f.write(response.body)
|
||||
original_path = self.original_path(request.url)
|
||||
if not cast(
|
||||
dict[str, Any] | None,
|
||||
self.store.stat_file(original_path, info),
|
||||
):
|
||||
original_buf = read_asset(tmp_file)
|
||||
self.store.persist_file(
|
||||
original_path,
|
||||
original_buf,
|
||||
info,
|
||||
meta=self.get_media_meta(media.probe_media(tmp_file)),
|
||||
headers={
|
||||
"Content-Type": self.original_mimetype(
|
||||
request.url, response=response
|
||||
)
|
||||
},
|
||||
)
|
||||
for _, setting, final_path in self.variant_paths(request.url):
|
||||
stat = cast(
|
||||
dict[str, Any] | None,
|
||||
|
|
|
|||
|
|
@ -114,6 +114,21 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
"samplingrate": "48000",
|
||||
"channels": "2",
|
||||
},
|
||||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"audio/{audio_base_path}",
|
||||
),
|
||||
"path": audio_base_path,
|
||||
"type": "audio/mpeg",
|
||||
"medium": "audio",
|
||||
"isDefault": "false",
|
||||
"fileSize": "5678",
|
||||
"bitrate": "128000",
|
||||
"duration": "61.2",
|
||||
"samplingrate": "44100",
|
||||
"channels": "2",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
|
@ -143,7 +158,23 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
"width": "1280",
|
||||
"height": "720",
|
||||
"framerate": "30/1",
|
||||
}
|
||||
},
|
||||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"video/{video_base_path}",
|
||||
),
|
||||
"path": video_base_path,
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "false",
|
||||
"fileSize": "12345",
|
||||
"bitrate": "456789",
|
||||
"duration": "60.0",
|
||||
"width": "640",
|
||||
"height": "360",
|
||||
"framerate": "24/1",
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
|
@ -271,6 +302,20 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
"duration": "61.2",
|
||||
"fileSize": "3456",
|
||||
},
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/audio/"
|
||||
f"{local_audio_path(source_audio)}"
|
||||
),
|
||||
"type": "audio/mpeg",
|
||||
"medium": "audio",
|
||||
"isDefault": "false",
|
||||
"bitrate": "128000",
|
||||
"samplingrate": "44100",
|
||||
"channels": "2",
|
||||
"duration": "61.2",
|
||||
"fileSize": "5678",
|
||||
},
|
||||
]
|
||||
|
||||
video_variants = video_group.findall("media:content", namespaces=nsmap)
|
||||
|
|
@ -291,7 +336,24 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
"width": "1280",
|
||||
"lang": "en",
|
||||
"fileSize": "9876",
|
||||
}
|
||||
},
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/video/"
|
||||
f"{local_video_path(source_video)}"
|
||||
),
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "false",
|
||||
"expression": "full",
|
||||
"bitrate": "456789",
|
||||
"framerate": "24/1",
|
||||
"duration": "60.0",
|
||||
"height": "360",
|
||||
"width": "640",
|
||||
"lang": "en",
|
||||
"fileSize": "12345",
|
||||
},
|
||||
]
|
||||
|
||||
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
return str(output_path)
|
||||
|
||||
def fake_probe_media(file_path: str):
|
||||
if file_path.endswith("vbr7.mp3"):
|
||||
if file_path.endswith(".mp3-vbr7.mp3"):
|
||||
return {
|
||||
"format": {
|
||||
"duration": "61.2",
|
||||
|
|
@ -253,6 +253,26 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
}
|
||||
],
|
||||
}
|
||||
if file_path.endswith(".mp3"):
|
||||
return {
|
||||
"format": {
|
||||
"duration": "61.2",
|
||||
"size": "5678",
|
||||
"bit_rate": "128000",
|
||||
"format_name": "mp3",
|
||||
"format_long_name": "MP3",
|
||||
},
|
||||
"streams": [
|
||||
{
|
||||
"codec_type": "audio",
|
||||
"codec_name": "mp3",
|
||||
"bit_rate": "128000",
|
||||
"duration_ts": "61200",
|
||||
"sample_rate": "44100",
|
||||
"channels": 2,
|
||||
}
|
||||
],
|
||||
}
|
||||
return {
|
||||
"format": {
|
||||
"duration": "61.2",
|
||||
|
|
@ -333,9 +353,22 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
"samplingrate": 48000,
|
||||
"channels": 2,
|
||||
},
|
||||
{
|
||||
"url": f"https://mirror.example/feeds/nasa/audio/{audio_base_path}",
|
||||
"path": audio_base_path,
|
||||
"type": "audio/mpeg",
|
||||
"medium": "audio",
|
||||
"isDefault": "false",
|
||||
"fileSize": "5678",
|
||||
"bitrate": 128000,
|
||||
"duration": "61.2",
|
||||
"samplingrate": 44100,
|
||||
"channels": 2,
|
||||
},
|
||||
],
|
||||
}
|
||||
assert persisted == [
|
||||
(audio_base_path, "audio/mpeg"),
|
||||
(f"{audio_base_path}-vbr7.mp3", "audio/mp3"),
|
||||
(f"{audio_base_path}-vbr3.aac", "audio/aac"),
|
||||
]
|
||||
|
|
@ -383,8 +416,16 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
lambda _: {
|
||||
"format": {
|
||||
"duration": "60.0",
|
||||
"size": "9876",
|
||||
"bit_rate": "123456",
|
||||
"size": (
|
||||
"12345"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else "9876"
|
||||
),
|
||||
"bit_rate": (
|
||||
"456789"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else "123456"
|
||||
),
|
||||
"format_name": "mp4",
|
||||
"format_long_name": "MP4",
|
||||
},
|
||||
|
|
@ -392,11 +433,27 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
{
|
||||
"codec_type": "video",
|
||||
"codec_name": "h264",
|
||||
"bit_rate": "123456",
|
||||
"bit_rate": (
|
||||
"456789"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else "123456"
|
||||
),
|
||||
"duration_ts": "60000",
|
||||
"width": 1280,
|
||||
"height": 720,
|
||||
"avg_frame_rate": "30/1",
|
||||
"width": (
|
||||
640
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else 1280
|
||||
),
|
||||
"height": (
|
||||
360
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else 720
|
||||
),
|
||||
"avg_frame_rate": (
|
||||
"24/1"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
else "30/1"
|
||||
),
|
||||
},
|
||||
{
|
||||
"codec_type": "audio",
|
||||
|
|
@ -451,10 +508,26 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
"width": 1280,
|
||||
"height": 720,
|
||||
"framerate": "30/1",
|
||||
}
|
||||
},
|
||||
{
|
||||
"url": f"https://mirror.example/feeds/nasa/video/{video_base_path}",
|
||||
"path": video_base_path,
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "false",
|
||||
"fileSize": "12345",
|
||||
"bitrate": 456789,
|
||||
"duration": "60.0",
|
||||
"width": 640,
|
||||
"height": 360,
|
||||
"framerate": "24/1",
|
||||
},
|
||||
],
|
||||
}
|
||||
assert persisted == [(f"{video_base_path}-720.mp4", "video/mp4")]
|
||||
assert persisted == [
|
||||
(video_base_path, "video/mp4"),
|
||||
(f"{video_base_path}-720.mp4", "video/mp4"),
|
||||
]
|
||||
|
||||
|
||||
def test_audio_pipeline_media_to_download_checks_canonical_path(
|
||||
|
|
@ -465,8 +538,11 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
|
|||
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
||||
source_url = "https://example.com/podcast.mp3"
|
||||
audio_base_path = local_audio_path(source_url)
|
||||
original_path = store_dir(pipeline) / audio_base_path
|
||||
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3"
|
||||
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac"
|
||||
original_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
original_path.write_bytes(b"original")
|
||||
canonical_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
canonical_path.write_bytes(b"default")
|
||||
secondary_path.write_bytes(b"alt")
|
||||
|
|
@ -524,5 +600,10 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
|
|||
assert result is not None
|
||||
assert result["path"] == f"{audio_base_path}-vbr7.mp3"
|
||||
assert result["status"] == "uptodate"
|
||||
assert [variant.get("path") for variant in result["variants"]] == [
|
||||
f"{audio_base_path}-vbr7.mp3",
|
||||
f"{audio_base_path}-vbr3.aac",
|
||||
audio_base_path,
|
||||
]
|
||||
assert f"{audio_base_path}.mp3" not in stat_paths
|
||||
assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue