Hash video profile paths
This commit is contained in:
parent
954608c5f9
commit
2ad0536bb0
5 changed files with 180 additions and 28 deletions
|
|
@ -170,6 +170,10 @@ def _frame_rate(stream: Dict[str, Any]) -> Optional[str]:
|
|||
return None
|
||||
|
||||
|
||||
def _scale_to_max_height(max_height: int) -> str:
|
||||
return f"scale=-2:{max_height}"
|
||||
|
||||
|
||||
def audio_meta(probe: Dict[str, Any]) -> Optional[AudioMeta]:
|
||||
stream = primary_audio_stream(probe)
|
||||
if not stream:
|
||||
|
|
@ -303,7 +307,7 @@ def video_transcode_params(
|
|||
params = {"extension": settings["extension"]}
|
||||
if len(passes) == 0 or is_vcodec:
|
||||
if not is_good_height:
|
||||
params["vf"] = f"scale={width}:{height}"
|
||||
params["vf"] = _scale_to_max_height(max_height)
|
||||
if not is_vcodec:
|
||||
params.update(settings["ffmpeg_video_params"])
|
||||
if not is_acodec or not is_audio_bitrate:
|
||||
|
|
@ -313,7 +317,7 @@ def video_transcode_params(
|
|||
for p in passes:
|
||||
p = copy.deepcopy(p)
|
||||
if not is_good_height:
|
||||
p["vf"] = f"scale={width}:{height}"
|
||||
p["vf"] = _scale_to_max_height(max_height)
|
||||
params["passes"].append(p)
|
||||
return params
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import hashlib
|
||||
import json
|
||||
import mimetypes
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
|
|
@ -42,8 +43,23 @@ def local_audio_path(s: str) -> str:
|
|||
return local_file_path(s)
|
||||
|
||||
|
||||
def variant_media_path(base_path: str, profile: Mapping[str, Any]) -> str:
|
||||
return f"{base_path}-{profile['name']}.{profile['extension']}"
|
||||
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
||||
settings = {
|
||||
key: value
|
||||
for key, value in profile.items()
|
||||
if key not in {"name", "mimetype", "extension"}
|
||||
}
|
||||
payload = json.dumps(settings, sort_keys=True, separators=(",", ":"))
|
||||
return hashlib.sha1(to_bytes(payload)).hexdigest()[:8] # nosec
|
||||
|
||||
|
||||
def variant_media_path(
|
||||
base_path: str, profile: Mapping[str, Any], *, hashed: bool = False
|
||||
) -> str:
|
||||
profile_name = str(profile["name"])
|
||||
if hashed:
|
||||
profile_name = f"{profile_name}-{profile_settings_hash(profile)}"
|
||||
return f"{base_path}-{profile_name}.{profile['extension']}"
|
||||
|
||||
|
||||
def published_media_path(
|
||||
|
|
@ -52,7 +68,7 @@ def published_media_path(
|
|||
if file_type == FileType.AUDIO:
|
||||
return variant_media_path(local_audio_path(source_url), profile)
|
||||
if file_type == FileType.VIDEO:
|
||||
return variant_media_path(local_video_path(source_url), profile)
|
||||
return variant_media_path(local_video_path(source_url), profile, hashed=True)
|
||||
raise ValueError(f"Unsupported file type for published media path: {file_type}")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -135,10 +135,10 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
item.videos = [
|
||||
{
|
||||
"url": source_video,
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||
"published_url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"video/{video_base_path}-720.mp4",
|
||||
f"video/{video_base_path}-720-457f0928.mp4",
|
||||
),
|
||||
"checksum": "video-default",
|
||||
"status": "downloaded",
|
||||
|
|
@ -146,9 +146,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
{
|
||||
"url": _published_url(
|
||||
"https://mirror.example",
|
||||
f"video/{video_base_path}-720.mp4",
|
||||
f"video/{video_base_path}-720-457f0928.mp4",
|
||||
),
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "true",
|
||||
|
|
@ -323,7 +323,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
|||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/demo/video/"
|
||||
f"{local_video_path(source_video)}-720.mp4"
|
||||
f"{local_video_path(source_video)}-720-457f0928.mp4"
|
||||
),
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
|
|
|
|||
|
|
@ -6,7 +6,13 @@ from scrapy.settings import Settings
|
|||
from repub import entrypoint as entrypoint_module
|
||||
from repub import settings as repub_settings
|
||||
from repub.spiders.rss_spider import RssFeedSpider
|
||||
from repub.utils import FileType, local_audio_path, local_image_path, local_video_path
|
||||
from repub.utils import (
|
||||
FileType,
|
||||
local_audio_path,
|
||||
local_image_path,
|
||||
local_video_path,
|
||||
published_media_path,
|
||||
)
|
||||
|
||||
|
||||
def test_entrypoint_supports_file_feed_urls(tmp_path: Path, monkeypatch) -> None:
|
||||
|
|
@ -67,14 +73,32 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
|||
)
|
||||
== f"audio/{local_audio_path('https://example.com/media/podcast.mp3')}-vbr7.mp3"
|
||||
)
|
||||
assert (
|
||||
spider.rewrite_file_url(
|
||||
assert spider.rewrite_file_url(
|
||||
FileType.VIDEO,
|
||||
"https://example.com/media/clip.mp4",
|
||||
) == (
|
||||
"video/"
|
||||
f"{local_video_path('https://example.com/media/clip.mp4')}"
|
||||
"-720-457f0928.mp4"
|
||||
)
|
||||
== f"video/{local_video_path('https://example.com/media/clip.mp4')}-720.mp4"
|
||||
|
||||
|
||||
def test_published_video_path_changes_when_profile_args_change() -> None:
|
||||
source_url = "https://example.com/media/clip.mp4"
|
||||
base_profile = repub_settings.REPUBLISHER_VIDEO[0]
|
||||
|
||||
assert published_media_path(
|
||||
FileType.AUDIO, source_url, repub_settings.REPUBLISHER_AUDIO[0]
|
||||
) == (f"{local_audio_path(source_url)}-vbr7.mp3")
|
||||
assert published_media_path(FileType.VIDEO, source_url, base_profile) == (
|
||||
f"{local_video_path(source_url)}-720-457f0928.mp4"
|
||||
)
|
||||
|
||||
changed_profile = {**base_profile, "max_height": 1080}
|
||||
assert published_media_path(
|
||||
FileType.VIDEO, source_url, changed_profile
|
||||
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
||||
|
||||
|
||||
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
|
|
|
|||
|
|
@ -204,6 +204,111 @@ def test_transcode_video_prints_ffmpeg_output_on_error(
|
|||
assert ("video-stdout", False) in printed
|
||||
|
||||
|
||||
def test_video_transcode_params_scales_to_max_height() -> None:
|
||||
params = media.video_transcode_params(
|
||||
{
|
||||
"format": {"format_name": "mp4"},
|
||||
"streams": [
|
||||
{
|
||||
"codec_type": "video",
|
||||
"codec_name": "mpeg4",
|
||||
"bit_rate": "2000000",
|
||||
"duration_ts": "1",
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
},
|
||||
{
|
||||
"codec_type": "audio",
|
||||
"codec_name": "aac",
|
||||
"bit_rate": "128000",
|
||||
"duration_ts": "1",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "720",
|
||||
"container": "mp4",
|
||||
"vcodec": "h264",
|
||||
"acodec": "mp3",
|
||||
"audio_max_bitrate": 96000,
|
||||
"ffmpeg_audio_params": {"acodec": "libmp3lame"},
|
||||
"ffmpeg_video_params": {"vcodec": "h264", "strict": "-2"},
|
||||
"max_height": 720,
|
||||
"mimetype": "video/mp4",
|
||||
"extension": "mp4",
|
||||
},
|
||||
)
|
||||
|
||||
assert params == {
|
||||
"extension": "mp4",
|
||||
"vf": "scale=-2:720",
|
||||
"vcodec": "h264",
|
||||
"strict": "-2",
|
||||
"acodec": "libmp3lame",
|
||||
}
|
||||
|
||||
|
||||
def test_video_transcode_params_scales_to_max_height_for_multipass() -> None:
|
||||
params = media.video_transcode_params(
|
||||
{
|
||||
"format": {"format_name": "mp4"},
|
||||
"streams": [
|
||||
{
|
||||
"codec_type": "video",
|
||||
"codec_name": "mpeg4",
|
||||
"bit_rate": "2000000",
|
||||
"duration_ts": "1",
|
||||
"width": 1920,
|
||||
"height": 1080,
|
||||
},
|
||||
{
|
||||
"codec_type": "audio",
|
||||
"codec_name": "mp3",
|
||||
"bit_rate": "128000",
|
||||
"duration_ts": "1",
|
||||
},
|
||||
],
|
||||
},
|
||||
cast(
|
||||
media.VideoSettings,
|
||||
{
|
||||
"name": "720",
|
||||
"container": "webm",
|
||||
"vcodec": "libvpx-vp9",
|
||||
"acodec": "opus",
|
||||
"audio_max_bitrate": 96000,
|
||||
"ffmpeg_audio_params": {"c:a": "libopus", "b:a": "96k"},
|
||||
"ffmpeg_video_params": {},
|
||||
"max_height": 720,
|
||||
"mimetype": "video/webm",
|
||||
"extension": "webm",
|
||||
"passes": [
|
||||
{"c:v": "libvpx-vp9", "pass": "1", "f": "null"},
|
||||
{"c:v": "libvpx-vp9", "pass": "2", "c:a": "libopus"},
|
||||
],
|
||||
},
|
||||
),
|
||||
)
|
||||
|
||||
assert params == {
|
||||
"extension": "webm",
|
||||
"passes": [
|
||||
{
|
||||
"c:v": "libvpx-vp9",
|
||||
"pass": "1",
|
||||
"f": "null",
|
||||
"vf": "scale=-2:720",
|
||||
},
|
||||
{
|
||||
"c:v": "libvpx-vp9",
|
||||
"pass": "2",
|
||||
"c:a": "libopus",
|
||||
"vf": "scale=-2:720",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
||||
monkeypatch, tmp_path: Path
|
||||
) -> None:
|
||||
|
|
@ -410,6 +515,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
return str(output_path)
|
||||
|
||||
monkeypatch.setattr(pipeline, "transcode", fake_transcode)
|
||||
transcoded_suffix = "-720-457f0928.mp4"
|
||||
monkeypatch.setattr(
|
||||
media,
|
||||
"probe_media",
|
||||
|
|
@ -418,12 +524,12 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
"duration": "60.0",
|
||||
"size": (
|
||||
"12345"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else "9876"
|
||||
),
|
||||
"bit_rate": (
|
||||
"456789"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else "123456"
|
||||
),
|
||||
"format_name": "mp4",
|
||||
|
|
@ -435,23 +541,23 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
"codec_name": "h264",
|
||||
"bit_rate": (
|
||||
"456789"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else "123456"
|
||||
),
|
||||
"duration_ts": "60000",
|
||||
"width": (
|
||||
640
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else 1280
|
||||
),
|
||||
"height": (
|
||||
360
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else 720
|
||||
),
|
||||
"avg_frame_rate": (
|
||||
"24/1"
|
||||
if _.endswith(".mp4") and not _.endswith("-720.mp4")
|
||||
if _.endswith(".mp4") and not _.endswith(transcoded_suffix)
|
||||
else "30/1"
|
||||
),
|
||||
},
|
||||
|
|
@ -487,18 +593,20 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
assert isinstance(result["checksum"], str)
|
||||
assert result == {
|
||||
"url": source_url,
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||
"published_url": (
|
||||
f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4"
|
||||
"https://mirror.example/feeds/nasa/video/"
|
||||
f"{video_base_path}-720-457f0928.mp4"
|
||||
),
|
||||
"checksum": result["checksum"],
|
||||
"status": "downloaded",
|
||||
"variants": [
|
||||
{
|
||||
"url": (
|
||||
f"https://mirror.example/feeds/nasa/video/{video_base_path}-720.mp4"
|
||||
"https://mirror.example/feeds/nasa/video/"
|
||||
f"{video_base_path}-720-457f0928.mp4"
|
||||
),
|
||||
"path": f"{video_base_path}-720.mp4",
|
||||
"path": f"{video_base_path}-720-457f0928.mp4",
|
||||
"type": "video/mp4",
|
||||
"medium": "video",
|
||||
"isDefault": "true",
|
||||
|
|
@ -526,7 +634,7 @@ def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
|||
}
|
||||
assert persisted == [
|
||||
(video_base_path, "video/mp4"),
|
||||
(f"{video_base_path}-720.mp4", "video/mp4"),
|
||||
(f"{video_base_path}-720-457f0928.mp4", "video/mp4"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue