Hash video profile paths
This commit is contained in:
parent
954608c5f9
commit
2ad0536bb0
5 changed files with 180 additions and 28 deletions
|
|
@ -170,6 +170,10 @@ def _frame_rate(stream: Dict[str, Any]) -> Optional[str]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _scale_to_max_height(max_height: int) -> str:
|
||||||
|
return f"scale=-2:{max_height}"
|
||||||
|
|
||||||
|
|
||||||
def audio_meta(probe: Dict[str, Any]) -> Optional[AudioMeta]:
|
def audio_meta(probe: Dict[str, Any]) -> Optional[AudioMeta]:
|
||||||
stream = primary_audio_stream(probe)
|
stream = primary_audio_stream(probe)
|
||||||
if not stream:
|
if not stream:
|
||||||
|
|
@ -303,7 +307,7 @@ def video_transcode_params(
|
||||||
params = {"extension": settings["extension"]}
|
params = {"extension": settings["extension"]}
|
||||||
if len(passes) == 0 or is_vcodec:
|
if len(passes) == 0 or is_vcodec:
|
||||||
if not is_good_height:
|
if not is_good_height:
|
||||||
params["vf"] = f"scale={width}:{height}"
|
params["vf"] = _scale_to_max_height(max_height)
|
||||||
if not is_vcodec:
|
if not is_vcodec:
|
||||||
params.update(settings["ffmpeg_video_params"])
|
params.update(settings["ffmpeg_video_params"])
|
||||||
if not is_acodec or not is_audio_bitrate:
|
if not is_acodec or not is_audio_bitrate:
|
||||||
|
|
@ -313,7 +317,7 @@ def video_transcode_params(
|
||||||
for p in passes:
|
for p in passes:
|
||||||
p = copy.deepcopy(p)
|
p = copy.deepcopy(p)
|
||||||
if not is_good_height:
|
if not is_good_height:
|
||||||
p["vf"] = f"scale={width}:{height}"
|
p["vf"] = _scale_to_max_height(max_height)
|
||||||
params["passes"].append(p)
|
params["passes"].append(p)
|
||||||
return params
|
return params
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import json
|
||||||
import mimetypes
|
import mimetypes
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -42,8 +43,23 @@ def local_audio_path(s: str) -> str:
|
||||||
return local_file_path(s)
|
return local_file_path(s)
|
||||||
|
|
||||||
|
|
||||||
def variant_media_path(base_path: str, profile: Mapping[str, Any]) -> str:
|
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
||||||
return f"{base_path}-{profile['name']}.{profile['extension']}"
|
settings = {
|
||||||
|
key: value
|
||||||
|
for key, value in profile.items()
|
||||||
|
if key not in {"name", "mimetype", "extension"}
|
||||||
|
}
|
||||||
|
payload = json.dumps(settings, sort_keys=True, separators=(",", ":"))
|
||||||
|
return hashlib.sha1(to_bytes(payload)).hexdigest()[:8] # nosec
|
||||||
|
|
||||||
|
|
||||||
|
def variant_media_path(
|
||||||
|
base_path: str, profile: Mapping[str, Any], *, hashed: bool = False
|
||||||
|
) -> str:
|
||||||
|
profile_name = str(profile["name"])
|
||||||
|
if hashed:
|
||||||
|
profile_name = f"{profile_name}-{profile_settings_hash(profile)}"
|
||||||
|
return f"{base_path}-{profile_name}.{profile['extension']}"
|
||||||
|
|
||||||
|
|
||||||
def published_media_path(
|
def published_media_path(
|
||||||
|
|
@ -52,7 +68,7 @@ def published_media_path(
|
||||||
if file_type == FileType.AUDIO:
|
if file_type == FileType.AUDIO:
|
||||||
return variant_media_path(local_audio_path(source_url), profile)
|
return variant_media_path(local_audio_path(source_url), profile)
|
||||||
if file_type == FileType.VIDEO:
|
if file_type == FileType.VIDEO:
|
||||||
return variant_media_path(local_video_path(source_url), profile)
|
return variant_media_path(local_video_path(source_url), profile, hashed=True)
|
||||||
raise ValueError(f"Unsupported file type for published media path: {file_type}")
|
raise ValueError(f"Unsupported file type for published media path: {file_type}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -135,10 +135,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
item.videos = [
|
item.videos = [
|
||||||
{
|
{
|
||||||
"url": source_video,
|
"url": source_video,
|
||||||
"path": f"{video_base_path}-720.mp4",
|
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||||
"published_url": _published_url(
|
"published_url": _published_url(
|
||||||
"https://mirror.example",
|
"https://mirror.example",
|
||||||
f"video/{video_base_path}-720.mp4",
|
f"video/{video_base_path}-720-457f0928.mp4",
|
||||||
),
|
),
|
||||||
"checksum": "video-default",
|
"checksum": "video-default",
|
||||||
"status": "downloaded",
|
"status": "downloaded",
|
||||||
|
|
@ -146,9 +146,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
{
|
{
|
||||||
"url": _published_url(
|
"url": _published_url(
|
||||||
"https://mirror.example",
|
"https://mirror.example",
|
||||||
f"video/{video_base_path}-720.mp4",
|
f"video/{video_base_path}-720-457f0928.mp4",
|
||||||
),
|
),
|
||||||
"path": f"{video_base_path}-720.mp4",
|
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||||
"type": "video/mp4",
|
"type": "video/mp4",
|
||||||
"medium": "video",
|
"medium": "video",
|
||||||
"isDefault": "true",
|
"isDefault": "true",
|
||||||
|
|
@ -323,7 +323,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
{
|
{
|
||||||
"url": (
|
"url": (
|
||||||
f"https://mirror.example/feeds/demo/video/"
|
f"https://mirror.example/feeds/demo/video/"
|
||||||
f"{local_video_path(source_video)}-720.mp4"
|
f"{local_video_path(source_video)}-720-457f0928.mp4"
|
||||||
),
|
),
|
||||||
"type": "video/mp4",
|
"type": "video/mp4",
|
||||||
"medium": "video",
|
"medium": "video",
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,13 @@ from scrapy.settings import Settings
|
||||||
from repub import entrypoint as entrypoint_module
|
from repub import entrypoint as entrypoint_module
|
||||||
from repub import settings as repub_settings
|
from repub import settings as repub_settings
|
||||||
from repub.spiders.rss_spider import RssFeedSpider
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
from repub.utils import FileType, local_audio_path, local_image_path, local_video_path
|
from repub.utils import (
|
||||||
|
FileType,
|
||||||
|
local_audio_path,
|
||||||
|
local_image_path,
|
||||||
|
local_video_path,
|
||||||
|
published_media_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
|
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
|
||||||
|
|
@ -67,14 +73,32 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
)
|
)
|
||||||
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3"
|
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3"
|
||||||
)
|
)
|
||||||
assert (
|
assert spider.rewrite_file_url(
|
||||||
spider.rewrite_file_url(
|
|
||||||
FileType.VIDEO,
|
FileType.VIDEO,
|
||||||
"https://example.com/media/clip.mp4",
|
"https://example.com/media/clip.mp4",
|
||||||
|
) == (
|
||||||
|
"video/"
|
||||||
|
f"{local_video_path('https://example.com/media/clip.mp4')}"
|
||||||
|
"-720-457f0928.mp4"
|
||||||
)
|
)
|
||||||
== f"video/{local_video_path('https://example.com/media/clip.mp4')}-720.mp4"
|
|
||||||
|
|
||||||
|
def test_published_video_path_changes_when_profile_args_change() -> None:
|
||||||
|
source_url = "https://example.com/media/clip.mp4"
|
||||||
|
base_profile = repub_settings.REPUBLISHER_VIDEO[0]
|
||||||
|
|
||||||
|
assert published_media_path(
|
||||||
|
FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0]
|
||||||
|
) == (f"{local_audio_path(source_url)}-vbr7.mp3")
|
||||||
|
assert published_media_path(FileType.VIDEO, source_url, base_profile) == (
|
||||||
|
f"{local_video_path(source_url)}-720-457f0928.mp4"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
changed_profile = {**base_profile, "max_height": 1080}
|
||||||
|
assert published_media_path(
|
||||||
|
FileType.VIDEO, source_url, changed_profile
|
||||||
|
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
||||||
|
|
||||||
|
|
||||||
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||||
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,111 @@ def test_transcode_video_prints_ffmpeg_output_on_error(
|
||||||
assert ("video-stdout", False) in printed
|
assert ("video-stdout", False) in printed
|
||||||
|
|
||||||
|
|
||||||
|
def test_video_transcode_params_scales_to_max_height() -> None:
|
||||||
|
params = media.video_transcode_params(
|
||||||
|
{
|
||||||
|
"format": {"format_name": "mp4"},
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"codec_type": "video",
|
||||||
|
"codec_name": "mpeg4",
|
||||||
|
"bit_rate": "2000000",
|
||||||
|
"duration_ts": "1",
|
||||||
|
"width": 1920,
|
||||||
|
"height": 1080,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"codec_type": "audio",
|
||||||
|
"codec_name": "aac",
|
||||||
|
"bit_rate": "128000",
|
||||||
|
"duration_ts": "1",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "720",
|
||||||
|
"container": "mp4",
|
||||||
|
"vcodec": "h264",
|
||||||
|
"acodec": "mp3",
|
||||||
|
"audio_max_bitrate": 96000,
|
||||||
|
"ffmpeg_audio_params": {"acodec": "libmp3lame"},
|
||||||
|
"ffmpeg_video_params": {"vcodec": "h264", "strict": "-2"},
|
||||||
|
"max_height": 720,
|
||||||
|
"mimetype": "video/mp4",
|
||||||
|
"extension": "mp4",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert params == {
|
||||||
|
"extension": "mp4",
|
||||||
|
"vf": "scale=-2:720",
|
||||||
|
"vcodec": "h264",
|
||||||
|
"strict": "-2",
|
||||||
|
"acodec": "libmp3lame",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_video_transcode_params_scales_to_max_height_for_multipass() -> None:
|
||||||
|
params = media.video_transcode_params(
|
||||||
|
{
|
||||||
|
"format": {"format_name": "mp4"},
|
||||||
|
"streams": [
|
||||||
|
{
|
||||||
|
"codec_type": "video",
|
||||||
|
"codec_name": "mpeg4",
|
||||||
|
"bit_rate": "2000000",
|
||||||
|
"duration_ts": "1",
|
||||||
|
"width": 1920,
|
||||||
|
"height": 1080,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"codec_type": "audio",
|
||||||
|
"codec_name": "mp3",
|
||||||
|
"bit_rate": "128000",
|
||||||
|
"duration_ts": "1",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
cast(
|
||||||
|
media.VideoSettings,
|
||||||
|
{
|
||||||
|
"name": "720",
|
||||||
|
"container": "webm",
|
||||||
|
"vcodec": "libvpx-vp9",
|
||||||
|
"acodec": "opus",
|
||||||
|
"audio_max_bitrate": 96000,
|
||||||
|
"ffmpeg_audio_params": {"c:a": "libopus", "b:a": "96k"},
|
||||||
|
"ffmpeg_video_params": {},
|
||||||
|
"max_height": 720,
|
||||||
|
"mimetype": "video/webm",
|
||||||
|
"extension": "webm",
|
||||||
|
"passes": [
|
||||||
|
{"c:v": "libvpx-vp9", "pass": "1", "f": "null"},
|
||||||
|
{"c:v": "libvpx-vp9", "pass": "2", "c:a": "libopus"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
assert params == {
|
||||||
|
"extension": "webm",
|
||||||
|
"passes": [
|
||||||
|
{
|
||||||
|
"c:v": "libvpx-vp9",
|
||||||
|
"pass": "1",
|
||||||
|
"f": "null",
|
||||||
|
"vf": "scale=-2:720",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"c:v": "libvpx-vp9",
|
||||||
|
"pass": "2",
|
||||||
|
"c:a": "libopus",
|
||||||
|
"vf": "scale=-2:720",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
||||||
monkeypatch, tmp_path: Path
|
monkeypatch, tmp_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
@ -410,6 +515,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
return str(output_path)
|
return str(output_path)
|
||||||
|
|
||||||
monkeypatch.setattr(pipeline, "transcode", fake_transcode)
|
monkeypatch.setattr(pipeline, "transcode", fake_transcode)
|
||||||
|
transcoded_suffix = "-720-457f0928.mp4"
|
||||||
monkeypatch.setattr(
|
monkeypatch.setattr(
|
||||||
media,
|
media,
|
||||||
"probe_media",
|
"probe_media",
|
||||||
|
|
@ -418,12 +524,12 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
"duration": "60.0",
|
"duration": "60.0",
|
||||||
"size": (
|
"size": (
|
||||||
"12345"
|
"12345"
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else "9876"
|
else "9876"
|
||||||
),
|
),
|
||||||
"bit_rate": (
|
"bit_rate": (
|
||||||
"456789"
|
"456789"
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else "123456"
|
else "123456"
|
||||||
),
|
),
|
||||||
"format_name": "mp4",
|
"format_name": "mp4",
|
||||||
|
|
@ -435,23 +541,23 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
"codec_name": "h264",
|
"codec_name": "h264",
|
||||||
"bit_rate": (
|
"bit_rate": (
|
||||||
"456789"
|
"456789"
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else "123456"
|
else "123456"
|
||||||
),
|
),
|
||||||
"duration_ts": "60000",
|
"duration_ts": "60000",
|
||||||
"width": (
|
"width": (
|
||||||
640
|
640
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else 1280
|
else 1280
|
||||||
),
|
),
|
||||||
"height": (
|
"height": (
|
||||||
360
|
360
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else 720
|
else 720
|
||||||
),
|
),
|
||||||
"avg_frame_rate": (
|
"avg_frame_rate": (
|
||||||
"24/1"
|
"24/1"
|
||||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||||
else "30/1"
|
else "30/1"
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
|
|
@ -487,18 +593,20 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
assert isinstance(result["checksum"], str)
|
assert isinstance(result["checksum"], str)
|
||||||
assert result == {
|
assert result == {
|
||||||
"url": source_url,
|
"url": source_url,
|
||||||
"path": f"{video_base_path}-720.mp4",
|
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||||
"published_url": (
|
"published_url": (
|
||||||
f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4"
|
"https://mirror.example/feeds/nasa/video/"
|
||||||
|
f"{video_base_path}-720-457f0928.mp4"
|
||||||
),
|
),
|
||||||
"checksum": result["checksum"],
|
"checksum": result["checksum"],
|
||||||
"status": "downloaded",
|
"status": "downloaded",
|
||||||
"variants": [
|
"variants": [
|
||||||
{
|
{
|
||||||
"url": (
|
"url": (
|
||||||
f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4"
|
"https://mirror.example/feeds/nasa/video/"
|
||||||
|
f"{video_base_path}-720-457f0928.mp4"
|
||||||
),
|
),
|
||||||
"path": f"{video_base_path}-720.mp4",
|
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||||
"type": "video/mp4",
|
"type": "video/mp4",
|
||||||
"medium": "video",
|
"medium": "video",
|
||||||
"isDefault": "true",
|
"isDefault": "true",
|
||||||
|
|
@ -526,7 +634,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
}
|
}
|
||||||
assert persisted == [
|
assert persisted == [
|
||||||
(video_base_path, "video/mp4"),
|
(video_base_path, "video/mp4"),
|
||||||
(f"{video_base_path}-720.mp4", "video/mp4"),
|
(f"{video_base_path}-720-457f0928.mp4", "video/mp4"),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue