Replace image pipeline with profile-driven variants

- add image normalization profiles and thumbnail profiles
- generate source, full-size variant, and thumbnail image artifacts
- rewrite canonical image URLs through the first configured profile
- emit explicit image Media RSS groups with named thumbnails
- preserve legacy image paths when image conversion is disabled
- cover cache-hit source paths, inline image handling, and thumbnail export
This commit is contained in:
Abel Luck 2026-05-27 09:24:22 +02:00
parent 7316d4723f
commit 525393272e
13 changed files with 1299 additions and 124 deletions

View file

@ -8,10 +8,13 @@ from repub import settings as repub_settings
from repub.spiders.rss_spider import RssFeedSpider
from repub.utils import (
FileType,
canonical_published_image_path,
local_audio_path,
local_image_path,
local_video_path,
published_image_path,
published_media_path,
thumbnail_image_path,
)
@ -57,14 +60,17 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
}
)
assert (
spider.rewrite_image_url("https://example.com/media/photo.jpg")
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
assert spider.rewrite_image_url(
"https://example.com/media/photo.jpg"
) == "images/" + canonical_published_image_path(
"https://example.com/media/photo.jpg",
repub_settings.REPUBLISHER_IMAGE,
)
assert spider.rewrite_file_url(
FileType.AUDIO,
@ -90,6 +96,28 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
)
def test_rss_spider_keeps_legacy_image_paths_when_image_normalization_disabled() -> (
None
):
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
spider.settings = Settings(
values={
"REPUBLISHER_IMAGE_DIR": "images",
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED": False,
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
}
)
assert spider.rewrite_image_url("https://example.com/media/photo.jpg") == (
f"images/{local_image_path('https://example.com/media/photo.jpg')}"
)
def test_published_media_path_changes_when_profile_args_change() -> None:
source_url = "https://example.com/media/clip.mp4"
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
@ -113,6 +141,41 @@ def test_published_media_path_changes_when_profile_args_change() -> None:
) != published_media_path(FileType.VIDEO, source_url, base_profile)
def test_published_image_and_thumbnail_paths_change_when_profile_args_change() -> None:
source_url = "https://example.com/media/photo.png"
base_image_profile = repub_settings.REPUBLISHER_IMAGE[0]
base_thumbnail_profile = repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0]
assert canonical_published_image_path(
source_url,
repub_settings.REPUBLISHER_IMAGE,
) == published_image_path(source_url, base_image_profile)
changed_image_profile = {
**base_image_profile,
"transform_kwargs": {
**base_image_profile["transform_kwargs"],
"width": 2048,
},
}
assert published_image_path(
source_url,
changed_image_profile,
) != published_image_path(source_url, base_image_profile)
changed_thumbnail_profile = {
**base_thumbnail_profile,
"save_kwargs": {
**base_thumbnail_profile["save_kwargs"],
"Q": 60,
},
}
assert thumbnail_image_path(
source_url,
changed_thumbnail_profile,
) != thumbnail_image_path(source_url, base_thumbnail_profile)
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
@ -138,6 +201,7 @@ def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
}