Replace image pipeline with profile-driven variants

- add image normalization profiles and thumbnail profiles
- generate source, full-size variant, and thumbnail image artifacts
- rewrite canonical image URLs through the first configured profile
- emit explicit image Media RSS groups with named thumbnails
- preserve legacy image paths when image conversion is disabled
- cover cache-hit source paths, inline image handling, and thumbnail export
This commit is contained in:
Abel Luck 2026-05-27 09:24:22 +02:00
parent 7316d4723f
commit 525393272e
13 changed files with 1299 additions and 124 deletions

View file

@ -224,7 +224,46 @@ def test_build_feed_settings_can_disable_image_and_video_conversion(
convert_video=False,
)
assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
assert (
"repub.pipelines.ImageNormalizePipeline" not in feed_settings["ITEM_PIPELINES"]
)
assert (
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
)
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
assert feed_settings["REPUBLISHER_IMAGE_NORMALIZE_ENABLED"] is False
assert feed_settings["REPUBLISHER_IMAGE_THUMBNAILS_ENABLED"] is False
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 3
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 5
def test_build_feed_settings_respects_image_pipeline_feature_flags(
tmp_path: Path,
) -> None:
out_dir = (tmp_path / "mirror").resolve()
config = RepublisherConfig(
config_path=tmp_path / "repub.toml",
out_dir=out_dir,
feeds=(
FeedConfig(
name="Guardian Project Podcast",
slug="gp-pod",
url="https://guardianproject.info/podcast/podcast.xml",
),
),
scrapy_settings={"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": False},
)
base_settings = build_base_settings(config)
feed_settings = build_feed_settings(
base_settings,
out_dir=out_dir,
feed_slug="gp-pod",
)
assert (
feed_settings["ITEM_PIPELINES"]["repub.pipelines.ImageNormalizePipeline"] == 1
)
assert (
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
)