Include original media in media groups
This commit is contained in:
parent
89d462e280
commit
954608c5f9
3 changed files with 212 additions and 14 deletions
|
|
@ -1,5 +1,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
|
import mimetypes
|
||||||
import tempfile
|
import tempfile
|
||||||
import time
|
import time
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
@ -116,6 +117,26 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
for index, setting in enumerate(settings)
|
for index, setting in enumerate(settings)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def original_path(self, source_url: str) -> str:
|
||||||
|
if self.media_type == repub.utils.FileType.AUDIO:
|
||||||
|
return repub.utils.local_audio_path(source_url)
|
||||||
|
if self.media_type == repub.utils.FileType.VIDEO:
|
||||||
|
return repub.utils.local_video_path(source_url)
|
||||||
|
raise ValueError(f"Unsupported media type: {self.media_type}")
|
||||||
|
|
||||||
|
def original_mimetype(self, source_url: str, response=None) -> str:
|
||||||
|
if response is not None:
|
||||||
|
content_type = response.headers.get(b"Content-Type")
|
||||||
|
if content_type:
|
||||||
|
return content_type.decode("utf-8").split(";", 1)[0].strip()
|
||||||
|
mimetype = mimetypes.guess_type(source_url)[0]
|
||||||
|
if mimetype:
|
||||||
|
return mimetype
|
||||||
|
return {
|
||||||
|
repub.utils.FileType.AUDIO: "audio/mpeg",
|
||||||
|
repub.utils.FileType.VIDEO: "video/mp4",
|
||||||
|
}[self.media_type]
|
||||||
|
|
||||||
def published_url(self, path: str, item=None) -> str:
|
def published_url(self, path: str, item=None) -> str:
|
||||||
relative_path = f"{self.media_dir()}/{path}"
|
relative_path = f"{self.media_dir()}/{path}"
|
||||||
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
||||||
|
|
@ -130,7 +151,7 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
path: str,
|
path: str,
|
||||||
setting: media.MediaSettings,
|
mimetype: str,
|
||||||
probe_result: dict[str, Any],
|
probe_result: dict[str, Any],
|
||||||
is_default: bool,
|
is_default: bool,
|
||||||
item=None,
|
item=None,
|
||||||
|
|
@ -138,7 +159,7 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
variant: MediaVariant = {
|
variant: MediaVariant = {
|
||||||
"url": self.published_url(path, item),
|
"url": self.published_url(path, item),
|
||||||
"path": path,
|
"path": path,
|
||||||
"type": setting["mimetype"],
|
"type": mimetype,
|
||||||
"medium": self.media_type.value,
|
"medium": self.media_type.value,
|
||||||
"isDefault": "true" if is_default else "false",
|
"isDefault": "true" if is_default else "false",
|
||||||
}
|
}
|
||||||
|
|
@ -158,12 +179,24 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
variants.append(
|
variants.append(
|
||||||
self.media_variant(
|
self.media_variant(
|
||||||
path=path,
|
path=path,
|
||||||
setting=setting,
|
mimetype=setting["mimetype"],
|
||||||
probe_result=probe_result,
|
probe_result=probe_result,
|
||||||
is_default=is_default,
|
is_default=is_default,
|
||||||
item=item,
|
item=item,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
original_path = self.original_path(request.url)
|
||||||
|
original_file = self.local_store_path(original_path)
|
||||||
|
if original_file.exists():
|
||||||
|
variants.append(
|
||||||
|
self.media_variant(
|
||||||
|
path=original_path,
|
||||||
|
mimetype=self.original_mimetype(request.url),
|
||||||
|
probe_result=media.probe_media(str(original_file)),
|
||||||
|
is_default=False,
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
)
|
||||||
return variants
|
return variants
|
||||||
|
|
||||||
def make_file_result(
|
def make_file_result(
|
||||||
|
|
@ -201,6 +234,11 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
for _, _, path in self.variant_paths(request.url):
|
for _, _, path in self.variant_paths(request.url):
|
||||||
if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
|
if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
|
||||||
return None
|
return None
|
||||||
|
if not cast(
|
||||||
|
dict[str, Any] | None,
|
||||||
|
self.store.stat_file(self.original_path(request.url), info),
|
||||||
|
):
|
||||||
|
return None
|
||||||
self.inc_stats("uptodate")
|
self.inc_stats("uptodate")
|
||||||
return self.make_file_result(
|
return self.make_file_result(
|
||||||
request,
|
request,
|
||||||
|
|
@ -218,6 +256,23 @@ class TranscodePipeline(BaseFilesPipeline):
|
||||||
tmp_file = f"{tmp_dir}/original"
|
tmp_file = f"{tmp_dir}/original"
|
||||||
with open(tmp_file, "wb") as f:
|
with open(tmp_file, "wb") as f:
|
||||||
f.write(response.body)
|
f.write(response.body)
|
||||||
|
original_path = self.original_path(request.url)
|
||||||
|
if not cast(
|
||||||
|
dict[str, Any] | None,
|
||||||
|
self.store.stat_file(original_path, info),
|
||||||
|
):
|
||||||
|
original_buf = read_asset(tmp_file)
|
||||||
|
self.store.persist_file(
|
||||||
|
original_path,
|
||||||
|
original_buf,
|
||||||
|
info,
|
||||||
|
meta=self.get_media_meta(media.probe_media(tmp_file)),
|
||||||
|
headers={
|
||||||
|
"Content-Type": self.original_mimetype(
|
||||||
|
request.url, response=response
|
||||||
|
)
|
||||||
|
},
|
||||||
|
)
|
||||||
for _, setting, final_path in self.variant_paths(request.url):
|
for _, setting, final_path in self.variant_paths(request.url):
|
||||||
stat = cast(
|
stat = cast(
|
||||||
dict[str, Any] | None,
|
dict[str, Any] | None,
|
||||||
|
|
|
||||||
|
|
@ -114,6 +114,21 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
"samplingrate": "48000",
|
"samplingrate": "48000",
|
||||||
"channels": "2",
|
"channels": "2",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"audio/{audio_base_path}",
|
||||||
|
),
|
||||||
|
"path": audio_base_path,
|
||||||
|
"type": "audio/mpeg",
|
||||||
|
"medium": "audio",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": "5678",
|
||||||
|
"bitrate": "128000",
|
||||||
|
"duration": "61.2",
|
||||||
|
"samplingrate": "44100",
|
||||||
|
"channels": "2",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
@ -143,7 +158,23 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
"width": "1280",
|
"width": "1280",
|
||||||
"height": "720",
|
"height": "720",
|
||||||
"framerate": "30/1",
|
"framerate": "30/1",
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"video/{video_base_path}",
|
||||||
|
),
|
||||||
|
"path": video_base_path,
|
||||||
|
"type": "video/mp4",
|
||||||
|
"medium": "video",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": "12345",
|
||||||
|
"bitrate": "456789",
|
||||||
|
"duration": "60.0",
|
||||||
|
"width": "640",
|
||||||
|
"height": "360",
|
||||||
|
"framerate": "24/1",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
@ -271,6 +302,20 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
"duration": "61.2",
|
"duration": "61.2",
|
||||||
"fileSize": "3456",
|
"fileSize": "3456",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"url": (
|
||||||
|
f"https://mirror.example/feeds/demo/audio/"
|
||||||
|
f"{local_audio_path(source_audio)}"
|
||||||
|
),
|
||||||
|
"type": "audio/mpeg",
|
||||||
|
"medium": "audio",
|
||||||
|
"isDefault": "false",
|
||||||
|
"bitrate": "128000",
|
||||||
|
"samplingrate": "44100",
|
||||||
|
"channels": "2",
|
||||||
|
"duration": "61.2",
|
||||||
|
"fileSize": "5678",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
video_variants = video_group.findall("media:content", namespaces=nsmap)
|
video_variants = video_group.findall("media:content", namespaces=nsmap)
|
||||||
|
|
@ -291,7 +336,24 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
"width": "1280",
|
"width": "1280",
|
||||||
"lang": "en",
|
"lang": "en",
|
||||||
"fileSize": "9876",
|
"fileSize": "9876",
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
"url": (
|
||||||
|
f"https://mirror.example/feeds/demo/video/"
|
||||||
|
f"{local_video_path(source_video)}"
|
||||||
|
),
|
||||||
|
"type": "video/mp4",
|
||||||
|
"medium": "video",
|
||||||
|
"isDefault": "false",
|
||||||
|
"expression": "full",
|
||||||
|
"bitrate": "456789",
|
||||||
|
"framerate": "24/1",
|
||||||
|
"duration": "60.0",
|
||||||
|
"height": "360",
|
||||||
|
"width": "640",
|
||||||
|
"lang": "en",
|
||||||
|
"fileSize": "12345",
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
||||||
|
|
|
||||||
|
|
@ -233,7 +233,7 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
return str(output_path)
|
return str(output_path)
|
||||||
|
|
||||||
def fake_probe_media(file_path: str):
|
def fake_probe_media(file_path: str):
|
||||||
if file_path.endswith("vbr7.mp3"):
|
if file_path.endswith(".mp3-vbr7.mp3"):
|
||||||
return {
|
return {
|
||||||
"format": {
|
"format": {
|
||||||
"duration": "61.2",
|
"duration": "61.2",
|
||||||
|
|
@ -253,6 +253,26 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
if file_path.endswith(".mp3"):
|
||||||
|
return {
|
||||||
|
"format": {
|
||||||
|
"duration": "61.2",
|
||||||
|
"size": "5678",
|
||||||
|
"bit_rate": "128000",
|
||||||
|
"format_name": "mp3",
|
||||||
|
"format_long_name": "MP3",
|
||||||
|
},
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"codec_type": "audio",
|
||||||
|
"codec_name": "mp3",
|
||||||
|
"bit_rate": "128000",
|
||||||
|
"duration_ts": "61200",
|
||||||
|
"sample_rate": "44100",
|
||||||
|
"channels": 2,
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
return {
|
return {
|
||||||
"format": {
|
"format": {
|
||||||
"duration": "61.2",
|
"duration": "61.2",
|
||||||
|
|
@ -333,9 +353,22 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
"samplingrate": 48000,
|
"samplingrate": 48000,
|
||||||
"channels": 2,
|
"channels": 2,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"url": f"https://mirror.example/feeds/nasa/audio/{audio_base_path}",
|
||||||
|
"path": audio_base_path,
|
||||||
|
"type": "audio/mpeg",
|
||||||
|
"medium": "audio",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": "5678",
|
||||||
|
"bitrate": 128000,
|
||||||
|
"duration": "61.2",
|
||||||
|
"samplingrate": 44100,
|
||||||
|
"channels": 2,
|
||||||
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
assert persisted == [
|
assert persisted == [
|
||||||
|
(audio_base_path, "audio/mpeg"),
|
||||||
(f"{audio_base_path}-vbr7.mp3", "audio/mp3"),
|
(f"{audio_base_path}-vbr7.mp3", "audio/mp3"),
|
||||||
(f"{audio_base_path}-vbr3.aac", "audio/aac"),
|
(f"{audio_base_path}-vbr3.aac", "audio/aac"),
|
||||||
]
|
]
|
||||||
|
|
@ -383,8 +416,16 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
lambda _: {
|
lambda _: {
|
||||||
"format": {
|
"format": {
|
||||||
"duration": "60.0",
|
"duration": "60.0",
|
||||||
"size": "9876",
|
"size": (
|
||||||
"bit_rate": "123456",
|
"12345"
|
||||||
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else "9876"
|
||||||
|
),
|
||||||
|
"bit_rate": (
|
||||||
|
"456789"
|
||||||
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else "123456"
|
||||||
|
),
|
||||||
"format_name": "mp4",
|
"format_name": "mp4",
|
||||||
"format_long_name": "MP4",
|
"format_long_name": "MP4",
|
||||||
},
|
},
|
||||||
|
|
@ -392,11 +433,27 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
{
|
{
|
||||||
"codec_type": "video",
|
"codec_type": "video",
|
||||||
"codec_name": "h264",
|
"codec_name": "h264",
|
||||||
"bit_rate": "123456",
|
"bit_rate": (
|
||||||
|
"456789"
|
||||||
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else "123456"
|
||||||
|
),
|
||||||
"duration_ts": "60000",
|
"duration_ts": "60000",
|
||||||
"width": 1280,
|
"width": (
|
||||||
"height": 720,
|
640
|
||||||
"avg_frame_rate": "30/1",
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else 1280
|
||||||
|
),
|
||||||
|
"height": (
|
||||||
|
360
|
||||||
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else 720
|
||||||
|
),
|
||||||
|
"avg_frame_rate": (
|
||||||
|
"24/1"
|
||||||
|
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||||
|
else "30/1"
|
||||||
|
),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"codec_type": "audio",
|
"codec_type": "audio",
|
||||||
|
|
@ -451,10 +508,26 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
"width": 1280,
|
"width": 1280,
|
||||||
"height": 720,
|
"height": 720,
|
||||||
"framerate": "30/1",
|
"framerate": "30/1",
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
"url": f"https://mirror.example/feeds/nasa/video/{video_base_path}",
|
||||||
|
"path": video_base_path,
|
||||||
|
"type": "video/mp4",
|
||||||
|
"medium": "video",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": "12345",
|
||||||
|
"bitrate": 456789,
|
||||||
|
"duration": "60.0",
|
||||||
|
"width": 640,
|
||||||
|
"height": 360,
|
||||||
|
"framerate": "24/1",
|
||||||
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
assert persisted == [(f"{video_base_path}-720.mp4", "video/mp4")]
|
assert persisted == [
|
||||||
|
(video_base_path, "video/mp4"),
|
||||||
|
(f"{video_base_path}-720.mp4", "video/mp4"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_audio_pipeline_media_to_download_checks_canonical_path(
|
def test_audio_pipeline_media_to_download_checks_canonical_path(
|
||||||
|
|
@ -465,8 +538,11 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
|
||||||
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
||||||
source_url = "https://example.com/podcast.mp3"
|
source_url = "https://example.com/podcast.mp3"
|
||||||
audio_base_path = local_audio_path(source_url)
|
audio_base_path = local_audio_path(source_url)
|
||||||
|
original_path = store_dir(pipeline) / audio_base_path
|
||||||
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3"
|
canonical_path = store_dir(pipeline) / f"{audio_base_path}-vbr7.mp3"
|
||||||
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac"
|
secondary_path = store_dir(pipeline) / f"{audio_base_path}-vbr3.aac"
|
||||||
|
original_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
original_path.write_bytes(b"original")
|
||||||
canonical_path.parent.mkdir(parents=True, exist_ok=True)
|
canonical_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
canonical_path.write_bytes(b"default")
|
canonical_path.write_bytes(b"default")
|
||||||
secondary_path.write_bytes(b"alt")
|
secondary_path.write_bytes(b"alt")
|
||||||
|
|
@ -524,5 +600,10 @@ def test_audio_pipeline_media_to_download_checks_canonical_path(
|
||||||
assert result is not None
|
assert result is not None
|
||||||
assert result["path"] == f"{audio_base_path}-vbr7.mp3"
|
assert result["path"] == f"{audio_base_path}-vbr7.mp3"
|
||||||
assert result["status"] == "uptodate"
|
assert result["status"] == "uptodate"
|
||||||
|
assert [variant.get("path") for variant in result["variants"]] == [
|
||||||
|
f"{audio_base_path}-vbr7.mp3",
|
||||||
|
f"{audio_base_path}-vbr3.aac",
|
||||||
|
audio_base_path,
|
||||||
|
]
|
||||||
assert f"{audio_base_path}.mp3" not in stat_paths
|
assert f"{audio_base_path}.mp3" not in stat_paths
|
||||||
assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3"
|
assert stat_paths[0] == f"{audio_base_path}-vbr7.mp3"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue