Replace image pipeline with profile-driven variants

- add image normalization profiles and thumbnail profiles - generate source, full-size variant, and thumbnail image artifacts - rewrite canonical image URLs through the first configured profile - emit explicit image Media RSS groups with named thumbnails - preserve legacy image paths when image conversion is disabled - cover cache-hit source paths, inline image handling, and thumbnail export
2026-05-27 09:24:22 +02:00 · 2026-05-27 09:24:22 +02:00 · 525393272e
commit 525393272e
parent 7316d4723f
13 changed files with 1299 additions and 124 deletions
--- a/repub/config.py
+++ b/repub/config.py
@ -188,21 +188,31 @@ def build_feed_settings(
    video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
    audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
    file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
+    image_normalize_enabled = convert_images and base_settings.getbool(
+        "REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True
+    )
+    image_thumbnails_enabled = image_normalize_enabled and base_settings.getbool(
+        "REPUBLISHER_IMAGE_THUMBNAILS_ENABLED", True
+    )
    item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
    item_pipelines.pop("repub.pipelines.ImagePipeline", None)
+    item_pipelines.pop("repub.pipelines.ImageNormalizePipeline", None)
+    item_pipelines.pop("repub.pipelines.ImageThumbnailPipeline", None)
    item_pipelines.pop("repub.pipelines.AudioPipeline", None)
    item_pipelines.pop("repub.pipelines.VideoPipeline", None)
    item_pipelines.pop("repub.pipelines.FilePipeline", None)
    item_pipelines.update(
        {
-            "repub.pipelines.AudioPipeline": 2,
-            "repub.pipelines.FilePipeline": 4,
+            "repub.pipelines.AudioPipeline": 3,
+            "repub.pipelines.FilePipeline": 5,
        }
    )
-    if convert_images:
-        item_pipelines["repub.pipelines.ImagePipeline"] = 1
+    if image_normalize_enabled:
+        item_pipelines["repub.pipelines.ImageNormalizePipeline"] = 1
+        if image_thumbnails_enabled:
+            item_pipelines["repub.pipelines.ImageThumbnailPipeline"] = 2
    if convert_video:
-        item_pipelines["repub.pipelines.VideoPipeline"] = 3
+        item_pipelines["repub.pipelines.VideoPipeline"] = 4
    settings = base_settings.copy()
    settings.setdict(
        {
@ -219,6 +229,8 @@ def build_feed_settings(
            "LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
            "HTTPCACHE_DIR": str(out_dir / "httpcache"),
            "REPUBLISHER_IMAGE_DIR": image_dir,
+            "REPUBLISHER_IMAGE_NORMALIZE_ENABLED": image_normalize_enabled,
+            "REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": image_thumbnails_enabled,
            "REPUBLISHER_VIDEO_DIR": video_dir,
            "REPUBLISHER_AUDIO_DIR": audio_dir,
            "REPUBLISHER_FILE_DIR": file_dir,
--- a/repub/exporters.py
+++ b/repub/exporters.py
@ -9,12 +9,17 @@ from repub.items import (
    ChannelElementItem,
    ElementItem,
    MediaVariant,
+    ThumbnailVariant,
+    TranscodedImageFile,
    TranscodedMediaFile,
 )
 from repub.utils import FileType, determine_file_type

 MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
 MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
+MEDIA_THUMBNAIL_TAG = QName(rss.nsmap["media"], "thumbnail").text
+ANYNEWS_SLOT_ATTR = QName(rss.nsmap["anynews"], "slot").text
+ANYNEWS_TYPE_ATTR = QName(rss.nsmap["anynews"], "type").text


 class RssExporter(BaseItemExporter):
@ -52,7 +57,9 @@ class RssExporter(BaseItemExporter):
            key: str(value) for key, value in attrib.items() if value not in (None, "")
        }

-    def canonical_variant(self, media_file: TranscodedMediaFile) -> MediaVariant | None:
+    def canonical_variant(
+        self, media_file: TranscodedMediaFile | TranscodedImageFile
+    ) -> MediaVariant | None:
        for variant in media_file["variants"]:
            if variant.get("isDefault") == "true":
                return variant
@ -92,6 +99,8 @@ class RssExporter(BaseItemExporter):
    def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
        fallbacks: dict[str, dict[str, str]] = {}
        managed_types: set[FileType] = set()
+        if self.managed_image_files(item):
+            managed_types.add(FileType.IMAGE)
        if item.audios:
            managed_types.add(FileType.AUDIO)
        if item.videos:
@ -100,6 +109,9 @@ class RssExporter(BaseItemExporter):
            return fallbacks

        for child in list(item.el):
+            if child.tag == MEDIA_THUMBNAIL_TAG and FileType.IMAGE in managed_types:
+                item.el.remove(child)
+                continue
            if child.tag == MEDIA_CONTENT_TAG:
                if self.owned_media_type(child, managed_types) is None:
                    continue
@ -113,25 +125,43 @@ class RssExporter(BaseItemExporter):

            if child.tag != MEDIA_GROUP_TAG:
                continue
+            managed_image_group = False
            for media_content in list(child):
                if media_content.tag != MEDIA_CONTENT_TAG:
                    continue
-                if self.owned_media_type(media_content, managed_types) is None:
+                owned_type = self.owned_media_type(media_content, managed_types)
+                if owned_type is None:
                    continue
+                if owned_type == FileType.IMAGE:
+                    managed_image_group = True
                fallbacks[media_content.get("url", "")] = {
                    key: value
                    for key, value in media_content.attrib.items()
                    if key in {"expression", "lang"}
                }
                child.remove(media_content)
+            if managed_image_group:
+                for media_thumbnail in list(child):
+                    if media_thumbnail.tag == MEDIA_THUMBNAIL_TAG:
+                        child.remove(media_thumbnail)
            if len(child) == 0:
                item.el.remove(child)
        return fallbacks

+    def managed_image_files(self, item: ElementItem) -> list[TranscodedImageFile]:
+        media_image_urls = set(item.media_image_urls)
+        if not media_image_urls:
+            return []
+        return [image for image in item.images if image["url"] in media_image_urls]
+
    def append_media_groups(
        self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
    ):
-        for media_file in [*item.audios, *item.videos]:
+        for media_file in [
+            *self.managed_image_files(item),
+            *item.audios,
+            *item.videos,
+        ]:
            if not media_file["variants"]:
                continue
            fallback_attrib = fallbacks.get(media_file["published_url"], {})
@ -141,7 +171,11 @@ class RssExporter(BaseItemExporter):
                        **self.media_content_attrib(variant, fallback_attrib)
                    )
                    for variant in media_file["variants"]
-                ]
+                ],
+                *[
+                    rss.MEDIA.thumbnail(**self.media_thumbnail_attrib(thumbnail))
+                    for thumbnail in media_file.get("thumbnails", [])
+                ],
            )
            if group is not None:
                item.el.append(group)
@ -170,10 +204,22 @@ class RssExporter(BaseItemExporter):
        )
        return attrib

+    def media_thumbnail_attrib(self, thumbnail: ThumbnailVariant) -> dict[str, str]:
+        attrib = self.compact_attrib(
+            url=thumbnail.get("url"),
+            width=thumbnail.get("width"),
+            height=thumbnail.get("height"),
+        )
+        if thumbnail.get("slot"):
+            attrib[ANYNEWS_SLOT_ATTR] = str(thumbnail["slot"])
+        if thumbnail.get("type"):
+            attrib[ANYNEWS_TYPE_ATTR] = str(thumbnail["type"])
+        return attrib
+
    def apply_transcoded_media(self, item: Any) -> None:
        if not isinstance(item, ElementItem):
            return
-        if not item.audios and not item.videos:
+        if not self.managed_image_files(item) and not item.audios and not item.videos:
            return
        self.rebuild_enclosures(item)
        fallbacks = self.strip_managed_media_nodes(item)
--- a/repub/items.py
+++ b/repub/items.py
@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any, List, TypedDict


@ -8,7 +8,7 @@ class MediaVariant(TypedDict, total=False):
    type: str
    medium: str
    isDefault: str
-    fileSize: str
+    fileSize: int | str
    bitrate: int | float | str
    samplingrate: int | str
    channels: int | str
@ -29,18 +29,39 @@ class TranscodedMediaFile(TypedDict):
    variants: List[MediaVariant]


+class ThumbnailVariant(TypedDict, total=False):
+    url: str
+    path: str
+    width: int | str
+    height: int | str
+    slot: str
+    type: str
+
+
+class TranscodedImageFile(TypedDict):
+    url: str
+    path: str
+    checksum: str | None
+    status: str
+    published_url: str
+    source_path: str
+    variants: List[MediaVariant]
+    thumbnails: List[ThumbnailVariant]
+
+
@dataclass
 class ElementItem:
    feed_name: str
    el: Any
    image_urls: List[str]
-    images: List[Any]
+    images: List[TranscodedImageFile]
    file_urls: List[str]
    files: List[Any]
    audio_urls: List[str]
    audios: List[TranscodedMediaFile]
    video_urls: List[str]
    videos: List[TranscodedMediaFile]
+    media_image_urls: List[str] = field(default_factory=list)


@dataclass
@ -48,4 +69,5 @@ class ChannelElementItem:
    feed_name: str
    el: Any
    image_urls: List[str]
-    images: List[Any]
+    images: List[TranscodedImageFile]
+    media_image_urls: List[str] = field(default_factory=list)
--- a/repub/pipelines.py
+++ b/repub/pipelines.py
@ -16,7 +16,12 @@ from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline

 import repub.utils
 from repub import media
-from repub.items import MediaVariant, TranscodedMediaFile
+from repub.items import (
+    MediaVariant,
+    ThumbnailVariant,
+    TranscodedImageFile,
+    TranscodedMediaFile,
+)

 logger = logging.getLogger(__name__)

@ -34,34 +39,108 @@ def image_mimetype(response=None, *, url: str | None = None) -> str | None:
    return None


-def convert_image_body_to_jpeg(
-    body: bytes,
-    *,
-    source_mimetype: str | None = None,
-) -> tuple[BytesIO, int, int]:
+def image_loader_name(image: Any) -> str:
+    if image.get_typeof("vips-loader"):
+        return str(image.get("vips-loader"))
+    return ""
+
+
+def image_loader_mimetype(loader: str, fallback: str | None = None) -> str | None:
+    known = {
+        "jpegload": "image/jpeg",
+        "pngload": "image/png",
+        "gifload": "image/gif",
+        "svgload": "image/svg+xml",
+        "tiffload": "image/tiff",
+        "webpload": "image/webp",
+        "heifload": "image/heif",
+        "jxlload": "image/jxl",
+    }
+    for prefix, mimetype in known.items():
+        if loader.startswith(prefix):
+            return mimetype
+    return fallback
+
+
+def load_image_from_buffer(body: bytes) -> Any:
    try:
-        image = cast(
+        return cast(
            Any,
            pyvips.Image.new_from_buffer(body, "", access="sequential"),
-        ).autorot()
+        )
    except pyvips.Error as exc:
        raise ImageException(str(exc)) from exc

-    width = image.width
-    height = image.height
-    loader = ""
-    if image.get_typeof("vips-loader"):
-        loader = str(image.get("vips-loader"))
-    if source_mimetype == "image/jpeg" or loader.startswith("jpegload"):
-        return BytesIO(body), width, height

-    if image.hasalpha():
-        image = image.flatten(background=[255, 255, 255])
+def load_image_from_file(file_path: str | Path) -> Any:
+    try:
+        return cast(
+            Any,
+            pyvips.Image.new_from_file(str(file_path), access="sequential"),
+        )
+    except pyvips.Error as exc:
+        raise ImageException(str(exc)) from exc
+
+
+def render_image_profile(source_path: str | Path, profile: dict[str, Any]) -> BytesIO:
+    transform = str(profile["transform"])
+    transform_kwargs = dict(profile.get("transform_kwargs", {}))
+    width = int(transform_kwargs.pop("width"))
+    if transform == "thumbnail":
+        image = cast(
+            Any,
+            pyvips.Image.thumbnail(str(source_path), width, **transform_kwargs),
+        )
+    elif transform == "thumbnail_buffer":
+        image = cast(
+            Any,
+            pyvips.Image.thumbnail_buffer(
+                Path(source_path).read_bytes(),
+                width,
+                **transform_kwargs,
+            ),
+        )
+    else:
+        raise ImageException(f"Unsupported image transform: {transform}")
+
    image = image.colourspace("srgb")
-    return BytesIO(image.jpegsave_buffer()), width, height
+    if image.hasalpha() and (
+        profile["mimetype"] == "image/jpeg"
+        or "background" in profile.get("save_kwargs", {})
+    ):
+        image = image.flatten(
+            background=profile.get("save_kwargs", {}).get("background", [255, 255, 255])
+        )
+
+    save_name = str(profile["save"])
+    try:
+        image_bytes = getattr(image, save_name)(**dict(profile.get("save_kwargs", {})))
+    except pyvips.Error as exc:
+        raise ImageException(str(exc)) from exc
+    return BytesIO(cast(bytes, image_bytes))


-class ImagePipeline(BaseFilesPipeline):
+def image_buffer_meta(
+    body: bytes,
+    *,
+    fallback_mimetype: str | None = None,
+) -> tuple[int, int, int, str | None]:
+    image = load_image_from_buffer(body)
+    mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
+    return image.width, image.height, len(body), mimetype
+
+
+def image_variant_meta(
+    file_path: str | Path,
+    *,
+    fallback_mimetype: str | None = None,
+) -> tuple[int, int, int, str | None]:
+    image = load_image_from_file(file_path)
+    mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
+    return image.width, image.height, Path(file_path).stat().st_size, mimetype
+
+
+class ImageNormalizePipeline(BaseFilesPipeline):
    MEDIA_NAME = "image"
    EXPIRES = 90
    MIN_WIDTH = 0
@ -100,29 +179,312 @@ class ImagePipeline(BaseFilesPipeline):
            self.MIN_HEIGHT,
        )

-    def file_path(self, request, response=None, info=None, *, item=None):
-        return repub.utils.local_image_path(request.url)
+    def get_image_settings(self) -> list[dict[str, Any]]:
+        return list(self.settings["REPUBLISHER_IMAGE"])

-    def file_downloaded(self, response, request, info, *, item=None):
-        path = self.file_path(request, response=response, info=info, item=item)
-        buf, width, height = convert_image_body_to_jpeg(
-            response.body,
-            source_mimetype=image_mimetype(response, url=request.url),
+    def file_path(self, request, response=None, info=None, *, item=None):
+        return repub.utils.canonical_published_image_path(
+            request.url,
+            self.get_image_settings(),
        )
-        if width < self.min_width or height < self.min_height:
+
+    def source_path(self, request, response=None) -> str:
+        return repub.utils.source_image_path(
+            request.url,
+            image_mimetype(response, url=request.url),
+        )
+
+    def resolve_source_path(self, request, response=None) -> str:
+        source_path = self.source_path(request, response)
+        if response is not None:
+            return source_path
+        source_file = self.local_store_path(source_path)
+        if source_file.exists():
+            return source_path
+        source_dir = self.local_store_path(
+            str(self.settings.get("REPUBLISHER_IMAGE_SOURCE_SUBDIR", "source"))
+        )
+        guid = repub.utils.image_guid(request.url)
+        matches = sorted(source_dir.glob(f"{guid}.*"))
+        if matches:
+            return f"{source_dir.name}/{matches[0].name}"
+        return source_path
+
+    def variant_paths(self, source_url: str) -> list[tuple[bool, dict[str, Any], str]]:
+        return [
+            (
+                index == 0,
+                setting,
+                repub.utils.published_image_path(source_url, setting),
+            )
+            for index, setting in enumerate(self.get_image_settings())
+        ]
+
+    def published_url(self, path: str, item=None) -> str:
+        relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
+        feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
+        if feed_url == "" or item is None:
+            return relative_path
+        return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
+
+    def local_store_path(self, path: str) -> Path:
+        return Path(cast(Any, self.store).basedir) / path
+
+    def image_variant(
+        self,
+        *,
+        path: str,
+        mimetype: str,
+        width: int,
+        height: int,
+        file_size: int,
+        is_default: bool,
+        item=None,
+    ) -> MediaVariant:
+        variant: MediaVariant = {
+            "url": self.published_url(path, item),
+            "path": path,
+            "type": mimetype,
+            "medium": repub.utils.FileType.IMAGE.value,
+            "isDefault": "true" if is_default else "false",
+            "fileSize": file_size,
+            "width": width,
+            "height": height,
+        }
+        return variant
+
+    def load_variants_from_disk(self, request, *, item=None) -> list[MediaVariant]:
+        variants: list[MediaVariant] = []
+        for is_default, setting, path in self.variant_paths(request.url):
+            file_path = self.local_store_path(path)
+            if not file_path.exists():
+                continue
+            width, height, file_size, mimetype = image_variant_meta(
+                file_path,
+                fallback_mimetype=setting["mimetype"],
+            )
+            variants.append(
+                self.image_variant(
+                    path=path,
+                    mimetype=mimetype or setting["mimetype"],
+                    width=width,
+                    height=height,
+                    file_size=file_size,
+                    is_default=is_default,
+                    item=item,
+                )
+            )
+        return variants
+
+    def make_file_result(
+        self,
+        request,
+        *,
+        checksum: str | None,
+        status: str,
+        response=None,
+        item=None,
+    ) -> TranscodedImageFile:
+        path = self.file_path(request, item=item)
+        return {
+            "url": request.url,
+            "path": path,
+            "published_url": self.published_url(path, item),
+            "checksum": checksum,
+            "status": status,
+            "source_path": self.resolve_source_path(request, response),
+            "variants": self.load_variants_from_disk(request, item=item),
+            "thumbnails": [],
+        }
+
+    def media_to_download(self, request, info, *, item=None):
+        canonical_path = self.file_path(request, info=info, item=item)
+        canonical_stat = cast(
+            dict[str, Any] | None,
+            self.store.stat_file(canonical_path, info),
+        )
+        if not canonical_stat:
+            return None
+        last_modified = canonical_stat.get("last_modified")
+        if not last_modified:
+            return None
+        age_days = (time.time() - last_modified) / 60 / 60 / 24
+        if age_days > self.expires:
+            return None
+        if not cast(
+            dict[str, Any] | None,
+            self.store.stat_file(self.resolve_source_path(request), info),
+        ):
+            return None
+        for _, _, path in self.variant_paths(request.url):
+            if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
+                return None
+        self.inc_stats("uptodate")
+        return self.make_file_result(
+            request,
+            checksum=canonical_stat.get("checksum"),
+            status="uptodate",
+            item=item,
+        )
+
+    def persist_variants(self, response, request, info, *, item=None) -> str | None:
+        source_file_path = self.local_store_path(self.source_path(request, response))
+        source_buf = BytesIO(response.body)
+        source_image = load_image_from_buffer(response.body).autorot()
+        if source_image.width < self.min_width or source_image.height < self.min_height:
            raise ImageException(
                "Image too small "
-                f"({width}x{height} < {self.min_width}x{self.min_height})"
+                f"({source_image.width}x{source_image.height} < "
+                f"{self.min_width}x{self.min_height})"
            )
-        checksum = buffer_checksum(buf)
-        self.store.persist_file(
-            path,
-            buf,
-            info,
-            meta={"width": width, "height": height},
-            headers={"Content-Type": "image/jpeg"},
+        if not cast(
+            dict[str, Any] | None,
+            self.store.stat_file(self.source_path(request, response), info),
+        ):
+            self.store.persist_file(
+                self.source_path(request, response),
+                source_buf,
+                info,
+                meta={"width": source_image.width, "height": source_image.height},
+                headers={
+                    "Content-Type": image_loader_mimetype(
+                        image_loader_name(source_image),
+                        image_mimetype(response, url=request.url),
+                    )
+                    or "application/octet-stream"
+                },
+            )
+        canonical_path = self.file_path(
+            request, response=response, info=info, item=item
        )
-        return checksum
+        canonical_checksum = None
+        for _, setting, final_path in self.variant_paths(request.url):
+            stat = cast(dict[str, Any] | None, self.store.stat_file(final_path, info))
+            if stat:
+                if final_path == canonical_path:
+                    canonical_checksum = stat.get("checksum")
+                continue
+            out_buf = render_image_profile(source_file_path, setting)
+            width, height, file_size, _ = image_buffer_meta(
+                out_buf.getvalue(),
+                fallback_mimetype=setting["mimetype"],
+            )
+            checksum = buffer_checksum(out_buf)
+            self.store.persist_file(
+                final_path,
+                out_buf,
+                info,
+                meta={"width": width, "height": height, "fileSize": file_size},
+                headers={"Content-Type": setting["mimetype"]},
+            )
+            if final_path == canonical_path:
+                canonical_checksum = checksum
+        return canonical_checksum
+
+    def media_downloaded(self, response, request, info, *, item=None):
+        if response.status != 200:
+            raise FileException("download-error")
+        if not response.body:
+            raise FileException("empty-content")
+        status = "cached" if "cached" in response.flags else "downloaded"
+        self.inc_stats(status)
+        checksum = self.persist_variants(response, request, info, item=item)
+        return self.make_file_result(
+            request,
+            checksum=checksum,
+            status=status,
+            response=response,
+            item=item,
+        )
+
+
+class ImageThumbnailPipeline:
+    @classmethod
+    def from_crawler(cls, crawler: Crawler):
+        return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
+
+    def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
+        self.settings = crawler.settings
+        self.store_dir = Path(store_uri)
+
+    def get_thumbnail_settings(self) -> list[dict[str, Any]]:
+        return list(self.settings["REPUBLISHER_IMAGE_THUMBNAILS"])
+
+    def local_store_path(self, path: str) -> Path:
+        return self.store_dir / path
+
+    def published_url(self, path: str, item=None) -> str:
+        relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
+        feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
+        if feed_url == "" or item is None:
+            return relative_path
+        return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
+
+    def persist_thumbnail(
+        self, source_file: Path, final_path: str, profile: dict[str, Any]
+    ):
+        out_buf = render_image_profile(source_file, profile)
+        target = self.local_store_path(final_path)
+        target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_bytes(out_buf.getvalue())
+
+    def load_thumbnail(
+        self,
+        *,
+        source_url: str,
+        profile: dict[str, Any],
+        item=None,
+    ) -> ThumbnailVariant | None:
+        final_path = repub.utils.thumbnail_image_path(source_url, profile)
+        file_path = self.local_store_path(final_path)
+        if not file_path.exists():
+            return None
+        width, height, _, mimetype = image_variant_meta(
+            file_path,
+            fallback_mimetype=profile["mimetype"],
+        )
+        return {
+            "url": self.published_url(final_path, item),
+            "path": final_path,
+            "slot": str(profile["name"]),
+            "type": mimetype or profile["mimetype"],
+            "width": width,
+            "height": height,
+        }
+
+    def process_item(self, item, spider):
+        del spider
+        if not getattr(item, "images", None):
+            return item
+        for image in item.images:
+            source_path = image.get("source_path")
+            if not source_path:
+                image["thumbnails"] = []
+                continue
+            source_file = self.local_store_path(source_path)
+            thumbnails: list[ThumbnailVariant] = []
+            for profile in self.get_thumbnail_settings():
+                final_path = repub.utils.thumbnail_image_path(image["url"], profile)
+                if not self.local_store_path(final_path).exists():
+                    try:
+                        self.persist_thumbnail(source_file, final_path, profile)
+                    except ImageException as exc:
+                        logger.warning(
+                            "Failed to generate thumbnail for %s: %s", image["url"], exc
+                        )
+                        continue
+                thumbnail = self.load_thumbnail(
+                    source_url=image["url"],
+                    profile=profile,
+                    item=item,
+                )
+                if thumbnail is not None:
+                    thumbnails.append(thumbnail)
+            image["thumbnails"] = thumbnails
+        return item
+
+
+ImagePipeline = ImageNormalizePipeline


 class FilePipeline(BaseFilesPipeline):
--- a/repub/rss.py
+++ b/repub/rss.py
@ -46,6 +46,7 @@ nsmap = {
    "itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
    "dc": "http://purl.org/dc/elements/1.1/",
    "atom": "http://www.w3.org/2005/Atom",
+    "anynews": "https://guardianproject.info/rss/anynews/1.0",
 }

 CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
--- a/repub/settings.py
+++ b/repub/settings.py
@ -100,6 +100,116 @@ LOG_LEVEL = "INFO"

 MEDIA_ALLOW_REDIRECTS = True

+REPUBLISHER_IMAGE_NORMALIZE_ENABLED = True
+REPUBLISHER_IMAGE_THUMBNAILS_ENABLED = True
+
+REPUBLISHER_IMAGE_DIR = "images"
+REPUBLISHER_IMAGE_FULL_SUBDIR = "full"
+REPUBLISHER_IMAGE_SOURCE_SUBDIR = "source"
+REPUBLISHER_IMAGE_THUMBNAIL_SUBDIR = "thumbs"
+
+REPUBLISHER_IMAGE = [
+    {
+        "name": "main_webp",
+        "mimetype": "image/webp",
+        "extension": "webp",
+        "transform": "thumbnail",
+        "transform_kwargs": {
+            "width": 1600,
+            "height": 1600,
+            "size": "down",
+            "no_rotate": False,
+            "linear": False,
+            "fail_on": "warning",
+        },
+        "save": "webpsave_buffer",
+        "save_kwargs": {
+            "Q": 82,
+            "preset": "photo",
+            "smart_subsample": True,
+            "effort": 4,
+            "alpha_q": 90,
+            "keep": "none",
+        },
+    },
+    {
+        "name": "fallback_jpeg",
+        "mimetype": "image/jpeg",
+        "extension": "jpg",
+        "transform": "thumbnail",
+        "transform_kwargs": {
+            "width": 1600,
+            "height": 1600,
+            "size": "down",
+            "no_rotate": False,
+            "linear": False,
+            "fail_on": "warning",
+        },
+        "save": "jpegsave_buffer",
+        "save_kwargs": {
+            "Q": 85,
+            "interlace": True,
+            "optimize_coding": True,
+            "trellis_quant": True,
+            "optimize_scans": True,
+            "subsample_mode": "auto",
+            "keep": "none",
+            "background": [255, 255, 255],
+        },
+    },
+]
+
+REPUBLISHER_IMAGE_THUMBNAILS = [
+    {
+        "name": "card_hero",
+        "mimetype": "image/jpeg",
+        "extension": "jpg",
+        "transform": "thumbnail",
+        "transform_kwargs": {
+            "width": 640,
+            "height": 360,
+            "size": "down",
+            "crop": "attention",
+            "no_rotate": False,
+            "linear": False,
+            "fail_on": "warning",
+        },
+        "save": "jpegsave_buffer",
+        "save_kwargs": {
+            "Q": 82,
+            "interlace": True,
+            "optimize_coding": True,
+            "subsample_mode": "auto",
+            "keep": "none",
+            "background": [255, 255, 255],
+        },
+    },
+    {
+        "name": "list_square",
+        "mimetype": "image/jpeg",
+        "extension": "jpg",
+        "transform": "thumbnail",
+        "transform_kwargs": {
+            "width": 160,
+            "height": 160,
+            "size": "down",
+            "crop": "centre",
+            "no_rotate": False,
+            "linear": False,
+            "fail_on": "warning",
+        },
+        "save": "jpegsave_buffer",
+        "save_kwargs": {
+            "Q": 78,
+            "interlace": True,
+            "optimize_coding": True,
+            "subsample_mode": "auto",
+            "keep": "none",
+            "background": [255, 255, 255],
+        },
+    },
+]
+
 REPUBLISHER_AUDIO = [
    {
        "name": "mp3_vbr7_voice",
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@ -21,6 +21,7 @@ from repub.rss import (
 )
 from repub.utils import (
    FileType,
+    canonical_published_image_path,
    canonical_published_media_path,
    determine_file_type,
    local_file_path,
@ -54,7 +55,16 @@ class BaseRssFeedSpider(Spider):
        local_path = local_file_path(url)
        if file_type == FileType.IMAGE:
            file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
-            local_path = local_image_path(url)
+            image_profiles = (
+                self.settings.get("REPUBLISHER_IMAGE") or []
+                if self.settings.getbool("REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True)
+                else []
+            )
+            local_path = (
+                canonical_published_image_path(url, image_profiles)
+                if image_profiles
+                else local_image_path(url)
+            )
        elif file_type == FileType.VIDEO:
            file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
            local_path = canonical_published_media_path(
@ -278,6 +288,7 @@ class RssFeedSpider(BaseRssFeedSpider):

    def parse_entry(self, response, feed, entry):
        image_urls = []
+        media_image_urls = []
        file_urls = []
        audio_urls = []
        video_urls = []
@ -323,6 +334,7 @@ class RssFeedSpider(BaseRssFeedSpider):
        )
        if entry.get("image"):
            image_urls.append(entry.get("image").href)
+            media_image_urls.append(entry.get("image").href)
        for enc in entry.enclosures:
            url = enc.get("href")
            file_type = determine_file_type(url=url, mimetype=enc.get("type"))
@ -381,6 +393,8 @@ class RssFeedSpider(BaseRssFeedSpider):
                    )
                )
                add_url(file_type, media.get("url"))
+                if file_type == FileType.IMAGE:
+                    media_image_urls.append(media.get("url"))
        return ElementItem(
            feed_name=self.feed_name,
            el=item,
@ -392,6 +406,7 @@ class RssFeedSpider(BaseRssFeedSpider):
            audios=[],
            video_urls=video_urls,
            videos=[],
+            media_image_urls=media_image_urls,
        )

    WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
--- a/repub/static/app.css
+++ b/repub/static/app.css
@ -419,6 +419,9 @@
  .rotate-180 {
    rotate: 180deg;
  }
+  .transform {
+    transform: var(--tw-rotate-x,) var(--tw-rotate-y,) var(--tw-rotate-z,) var(--tw-skew-x,) var(--tw-skew-y,);
+  }
  .animate-pulse {
    animation: var(--animate-pulse);
  }
@ -1221,6 +1224,26 @@
  inherits: false;
  initial-value: 0;
 }
+@property --tw-rotate-x {
+  syntax: "*";
+  inherits: false;
+}
+@property --tw-rotate-y {
+  syntax: "*";
+  inherits: false;
+}
+@property --tw-rotate-z {
+  syntax: "*";
+  inherits: false;
+}
+@property --tw-skew-x {
+  syntax: "*";
+  inherits: false;
+}
+@property --tw-skew-y {
+  syntax: "*";
+  inherits: false;
+}
@property --tw-space-y-reverse {
  syntax: "*";
  inherits: false;
@ -1460,6 +1483,11 @@
      --tw-translate-x: 0;
      --tw-translate-y: 0;
      --tw-translate-z: 0;
+      --tw-rotate-x: initial;
+      --tw-rotate-y: initial;
+      --tw-rotate-z: initial;
+      --tw-skew-x: initial;
+      --tw-skew-y: initial;
      --tw-space-y-reverse: 0;
      --tw-space-x-reverse: 0;
      --tw-divide-y-reverse: 0;
--- a/repub/utils.py
+++ b/repub/utils.py
@ -43,6 +43,50 @@ def local_audio_path(s: str) -> str:
    return local_file_path(s)


+def image_guid(source_url: str) -> str:
+    return hashlib.sha1(to_bytes(source_url)).hexdigest()  # nosec
+
+
+def image_extension(mimetype_or_extension: str | None, source_url: str = "") -> str:
+    if mimetype_or_extension:
+        if mimetype_or_extension.startswith("."):
+            extension = mimetype_or_extension
+        elif "/" in mimetype_or_extension:
+            extension = mimetypes.guess_extension(mimetype_or_extension) or ""
+        else:
+            extension = f".{mimetype_or_extension.lstrip('.')}"
+        if extension == ".jpe":
+            return ".jpg"
+        return extension
+    guessed = Path(source_url).suffix
+    if guessed == ".jpe":
+        return ".jpg"
+    if guessed:
+        return guessed
+    return ".img"
+
+
+def source_image_path(source_url: str, mimetype_or_extension: str | None = None) -> str:
+    extension = image_extension(mimetype_or_extension, source_url)
+    return f"source/{image_guid(source_url)}{extension}"
+
+
+def published_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
+    return variant_media_path(f"full/{image_guid(source_url)}", profile, hashed=True)
+
+
+def canonical_published_image_path(
+    source_url: str, profiles: Sequence[Mapping[str, Any]]
+) -> str:
+    if not profiles:
+        raise ValueError("Missing image normalization profiles")
+    return published_image_path(source_url, profiles[0])
+
+
+def thumbnail_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
+    return variant_media_path(f"thumbs/{image_guid(source_url)}", profile, hashed=True)
+
+
 def profile_settings_hash(profile: Mapping[str, Any]) -> str:
    settings = {
        key: value
@ -65,6 +109,8 @@ def variant_media_path(
 def published_media_path(
    file_type: FileType, source_url: str, profile: Mapping[str, Any]
 ) -> str:
+    if file_type == FileType.IMAGE:
+        return published_image_path(source_url, profile)
    if file_type == FileType.AUDIO:
        return variant_media_path(local_audio_path(source_url), profile, hashed=True)
    if file_type == FileType.VIDEO:
@ -79,6 +125,8 @@ def canonical_published_media_path(
        raise ValueError(f"Missing transcode profiles for {file_type.value}")
    # The first configured profile is the public URL contract. Reordering profiles
    # changes published URLs for already-mirrored media.
+    if file_type == FileType.IMAGE:
+        return canonical_published_image_path(source_url, profiles)
    return published_media_path(file_type, source_url, profiles[0])


--- a/tests/test_config.py
+++ b/tests/test_config.py
@ -224,7 +224,46 @@ def test_build_feed_settings_can_disable_image_and_video_conversion(
        convert_video=False,
    )

-    assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
+    assert (
+        "repub.pipelines.ImageNormalizePipeline" not in feed_settings["ITEM_PIPELINES"]
+    )
+    assert (
+        "repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
+    )
    assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
-    assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
-    assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
+    assert feed_settings["REPUBLISHER_IMAGE_NORMALIZE_ENABLED"] is False
+    assert feed_settings["REPUBLISHER_IMAGE_THUMBNAILS_ENABLED"] is False
+    assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 3
+    assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 5
+
+
+def test_build_feed_settings_respects_image_pipeline_feature_flags(
+    tmp_path: Path,
+) -> None:
+    out_dir = (tmp_path / "mirror").resolve()
+    config = RepublisherConfig(
+        config_path=tmp_path / "repub.toml",
+        out_dir=out_dir,
+        feeds=(
+            FeedConfig(
+                name="Guardian Project Podcast",
+                slug="gp-pod",
+                url="https://guardianproject.info/podcast/podcast.xml",
+            ),
+        ),
+        scrapy_settings={"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": False},
+    )
+
+    base_settings = build_base_settings(config)
+    feed_settings = build_feed_settings(
+        base_settings,
+        out_dir=out_dir,
+        feed_slug="gp-pod",
+    )
+
+    assert (
+        feed_settings["ITEM_PIPELINES"]["repub.pipelines.ImageNormalizePipeline"] == 1
+    )
+    assert (
+        "repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
+    )
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@ -16,10 +16,12 @@ from repub.rss import nsmap
 from repub.spiders.rss_spider import RssFeedSpider
 from repub.utils import (
    FileType,
+    canonical_published_image_path,
    local_audio_path,
-    local_image_path,
    local_video_path,
+    published_image_path,
    published_media_path,
+    thumbnail_image_path,
 )

 RSS_DATE_PATTERN = re.compile(
@ -44,6 +46,7 @@ def _serialize_feed(
            "REPUBLISHER_FILE_DIR": "files",
            "REPUBLISHER_AUDIO_DIR": "audio",
            "REPUBLISHER_VIDEO_DIR": "video",
+            "REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
            "REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
            "REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
            "REPUBLISHER_FEED_URL": feed_url,
@ -75,6 +78,18 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
    source_video = "https://source.example/media/video.mp4"
    channel_image = "https://source.example/media/channel.png"
    item_image = "https://source.example/media/cover.jpg"
+    image_main_path = published_image_path(
+        source_image,
+        repub_settings.REPUBLISHER_IMAGE[0],
+    )
+    image_fallback_path = published_image_path(
+        source_image,
+        repub_settings.REPUBLISHER_IMAGE[1],
+    )
+    image_thumbnail_path = thumbnail_image_path(
+        source_image,
+        repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
+    )
    audio_base_path = local_audio_path(source_audio)
    audio_default_path = published_media_path(
        FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
@ -94,6 +109,60 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
    )

    def prepare_item(item: ElementItem) -> None:
+        item.images = [
+            {
+                "url": source_image,
+                "path": image_main_path,
+                "published_url": _published_url(
+                    "https://mirror.example",
+                    f"images/{image_main_path}",
+                ),
+                "checksum": "image-default",
+                "status": "downloaded",
+                "source_path": "source/ignored.png",
+                "variants": [
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"images/{image_main_path}",
+                        ),
+                        "path": image_main_path,
+                        "type": "image/webp",
+                        "medium": "image",
+                        "isDefault": "true",
+                        "fileSize": "2345",
+                        "width": "1200",
+                        "height": "675",
+                    },
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"images/{image_fallback_path}",
+                        ),
+                        "path": image_fallback_path,
+                        "type": "image/jpeg",
+                        "medium": "image",
+                        "isDefault": "false",
+                        "fileSize": "3456",
+                        "width": "1200",
+                        "height": "675",
+                    },
+                ],
+                "thumbnails": [
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"images/{image_thumbnail_path}",
+                        ),
+                        "path": image_thumbnail_path,
+                        "slot": "card_hero",
+                        "type": "image/jpeg",
+                        "width": "640",
+                        "height": "360",
+                    }
+                ],
+            }
+        ]
        item.audios = [
            {
                "url": source_audio,
@ -261,6 +330,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
      <enclosure url="{source_audio}" length="123" type="audio/mpeg" />
      <content:encoded><![CDATA[<div mode="body" querystring="x=1"><img src="{source_image}" contenteditable="true"></div>]]></content:encoded>
+      <media:content url="{source_image}" type="image/jpeg" medium="image" expression="full" lang="en" />
      <media:content url="{source_video}" type="video/mp4" medium="video" expression="full" duration="60" width="640" height="360" lang="en" />
      <itunes:summary><![CDATA[{long_summary}]]></itunes:summary>
      <itunes:image href="{item_image}" />
@ -288,7 +358,11 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
    assert last_build_date == item_pub_date
    assert channel.findtext("itunes:explicit", namespaces=nsmap) == "false"
    assert channel.findtext("./image/url") == (
-        f"https://mirror.example/feeds/demo/images/{local_image_path(channel_image)}"
+        "https://mirror.example/feeds/demo/images/"
+        + canonical_published_image_path(
+            channel_image,
+            repub_settings.REPUBLISHER_IMAGE,
+        )
    )

    atom_self = channel.find("atom:link", namespaces=nsmap)
@ -318,9 +392,63 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
    assert root.find("./channel/item/media:content", namespaces=nsmap) is None

    media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
-    assert len(media_groups) == 2
+    assert len(media_groups) == 3
+
+    image_group = next(
+        group
+        for group in media_groups
+        if group.find("media:thumbnail", namespaces=nsmap) is not None
+    )
+    audio_group = next(
+        group
+        for group in media_groups
+        if group.findall("media:content", namespaces=nsmap)
+        and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "audio"
+    )
+    video_group = next(
+        group
+        for group in media_groups
+        if group.findall("media:content", namespaces=nsmap)
+        and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "video"
+    )
+
+    image_variants = image_group.findall("media:content", namespaces=nsmap)
+    assert [variant.attrib for variant in image_variants] == [
+        {
+            "url": (f"https://mirror.example/feeds/demo/images/" f"{image_main_path}"),
+            "type": "image/webp",
+            "medium": "image",
+            "isDefault": "true",
+            "expression": "full",
+            "lang": "en",
+            "height": "675",
+            "width": "1200",
+            "fileSize": "2345",
+        },
+        {
+            "url": (
+                f"https://mirror.example/feeds/demo/images/" f"{image_fallback_path}"
+            ),
+            "type": "image/jpeg",
+            "medium": "image",
+            "isDefault": "false",
+            "expression": "full",
+            "lang": "en",
+            "height": "675",
+            "width": "1200",
+            "fileSize": "3456",
+        },
+    ]
+    thumbnails = image_group.findall("media:thumbnail", namespaces=nsmap)
+    assert len(thumbnails) == 1
+    assert thumbnails[0].attrib == {
+        "url": (f"https://mirror.example/feeds/demo/images/" f"{image_thumbnail_path}"),
+        "width": "640",
+        "height": "360",
+        f"{{{nsmap['anynews']}}}slot": "card_hero",
+        f"{{{nsmap['anynews']}}}type": "image/jpeg",
+    }

-    audio_group, video_group = media_groups
    audio_variants = audio_group.findall("media:content", namespaces=nsmap)
    assert [variant.attrib for variant in audio_variants] == [
        {
@ -428,7 +556,13 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
    itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
    assert itunes_image is not None
    assert itunes_image.attrib == {
-        "href": f"https://mirror.example/feeds/demo/images/{local_image_path(item_image)}"
+        "href": (
+            "https://mirror.example/feeds/demo/images/"
+            + canonical_published_image_path(
+                item_image,
+                repub_settings.REPUBLISHER_IMAGE,
+            )
+        )
    }

    itunes_summary = root.findtext("./channel/item/itunes:summary", namespaces=nsmap)
@ -494,3 +628,165 @@ def test_item_body_uses_description_only_when_content_is_also_present() -> None:
    assert both_present.findtext("content:encoded", namespaces=nsmap) == (
        "<div>Full body</div>"
    )
+
+
+def test_exporter_does_not_emit_media_rss_for_inline_only_images() -> None:
+    source_image = "https://source.example/media/inline.jpg"
+
+    def prepare_item(item: ElementItem) -> None:
+        item.images = [
+            {
+                "url": source_image,
+                "path": published_image_path(
+                    source_image,
+                    repub_settings.REPUBLISHER_IMAGE[0],
+                ),
+                "published_url": _published_url(
+                    "https://mirror.example",
+                    "images/"
+                    + published_image_path(
+                        source_image,
+                        repub_settings.REPUBLISHER_IMAGE[0],
+                    ),
+                ),
+                "checksum": "inline-image",
+                "status": "downloaded",
+                "source_path": "source/inline.jpg",
+                "variants": [
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            "images/"
+                            + published_image_path(
+                                source_image,
+                                repub_settings.REPUBLISHER_IMAGE[0],
+                            ),
+                        ),
+                        "path": published_image_path(
+                            source_image,
+                            repub_settings.REPUBLISHER_IMAGE[0],
+                        ),
+                        "type": "image/webp",
+                        "medium": "image",
+                        "isDefault": "true",
+                        "width": "1200",
+                        "height": "675",
+                        "fileSize": "2345",
+                    }
+                ],
+                "thumbnails": [],
+            }
+        ]
+
+    _, root = _serialize_feed(
+        feed_url="https://mirror.example",
+        prepare_item=prepare_item,
+        feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:content="http://purl.org/rss/1.0/modules/content/">
+  <channel>
+    <title>Demo Feed</title>
+    <link>https://source.example/feed</link>
+    <description>Demo description</description>
+    <item>
+      <title>Inline Image Only</title>
+      <link>https://source.example/inline</link>
+      <guid isPermaLink="false">inline-only</guid>
+      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
+      <content:encoded><![CDATA[<div><img src="{source_image}"></div>]]></content:encoded>
+    </item>
+  </channel>
+</rss>
+""",
+    )
+
+    assert root.findall("./channel/item/media:group", namespaces=nsmap) == []
+
+
+def test_exporter_replaces_standalone_source_media_thumbnails() -> None:
+    source_image = "https://source.example/media/photo.jpg"
+    image_main_path = published_image_path(
+        source_image,
+        repub_settings.REPUBLISHER_IMAGE[0],
+    )
+    image_thumbnail_path = thumbnail_image_path(
+        source_image,
+        repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
+    )
+
+    def prepare_item(item: ElementItem) -> None:
+        item.images = [
+            {
+                "url": source_image,
+                "path": image_main_path,
+                "published_url": _published_url(
+                    "https://mirror.example",
+                    f"images/{image_main_path}",
+                ),
+                "checksum": "image-default",
+                "status": "downloaded",
+                "source_path": "source/ignored.png",
+                "variants": [
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"images/{image_main_path}",
+                        ),
+                        "path": image_main_path,
+                        "type": "image/webp",
+                        "medium": "image",
+                        "isDefault": "true",
+                        "fileSize": "2345",
+                        "width": "1200",
+                        "height": "675",
+                    }
+                ],
+                "thumbnails": [
+                    {
+                        "url": _published_url(
+                            "https://mirror.example",
+                            f"images/{image_thumbnail_path}",
+                        ),
+                        "path": image_thumbnail_path,
+                        "slot": "card_hero",
+                        "type": "image/jpeg",
+                        "width": "640",
+                        "height": "360",
+                    }
+                ],
+            }
+        ]
+
+    _, root = _serialize_feed(
+        feed_url="https://mirror.example",
+        prepare_item=prepare_item,
+        feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
+<rss version="2.0"
+     xmlns:media="http://search.yahoo.com/mrss/">
+  <channel>
+    <title>Demo Feed</title>
+    <link>https://source.example/feed</link>
+    <description>Demo description</description>
+    <item>
+      <title>Entry One</title>
+      <link>https://source.example/entry-1</link>
+      <guid isPermaLink="false">entry-1</guid>
+      <pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
+      <media:content url="{source_image}" type="image/jpeg" medium="image" />
+      <media:thumbnail url="https://source.example/media/source-thumb.jpg" width="10" height="10" />
+    </item>
+  </channel>
+</rss>
+""",
+    )
+
+    thumbnails = root.findall("./channel/item/media:thumbnail", namespaces=nsmap)
+    assert thumbnails == []
+    group_thumbnails = root.findall(
+        "./channel/item/media:group/media:thumbnail",
+        namespaces=nsmap,
+    )
+    assert len(group_thumbnails) == 1
+    assert group_thumbnails[0].get("url") == (
+        f"https://mirror.example/feeds/demo/images/{image_thumbnail_path}"
+    )
--- a/tests/test_file_feeds.py
+++ b/tests/test_file_feeds.py
@ -8,10 +8,13 @@ from repub import settings as repub_settings
 from repub.spiders.rss_spider import RssFeedSpider
 from repub.utils import (
    FileType,
+    canonical_published_image_path,
    local_audio_path,
    local_image_path,
    local_video_path,
+    published_image_path,
    published_media_path,
+    thumbnail_image_path,
 )


@ -57,14 +60,17 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
            "REPUBLISHER_FILE_DIR": "files",
            "REPUBLISHER_AUDIO_DIR": "audio",
            "REPUBLISHER_VIDEO_DIR": "video",
+            "REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
            "REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
            "REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
        }
    )

-    assert (
-        spider.rewrite_image_url("https://example.com/media/photo.jpg")
-        == f"images/{local_image_path('https://example.com/media/photo.jpg')}"
+    assert spider.rewrite_image_url(
+        "https://example.com/media/photo.jpg"
+    ) == "images/" + canonical_published_image_path(
+        "https://example.com/media/photo.jpg",
+        repub_settings.REPUBLISHER_IMAGE,
    )
    assert spider.rewrite_file_url(
        FileType.AUDIO,
@ -90,6 +96,28 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
    )


+def test_rss_spider_keeps_legacy_image_paths_when_image_normalization_disabled() -> (
+    None
+):
+    spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
+    spider.settings = Settings(
+        values={
+            "REPUBLISHER_IMAGE_DIR": "images",
+            "REPUBLISHER_FILE_DIR": "files",
+            "REPUBLISHER_AUDIO_DIR": "audio",
+            "REPUBLISHER_VIDEO_DIR": "video",
+            "REPUBLISHER_IMAGE_NORMALIZE_ENABLED": False,
+            "REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
+            "REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
+            "REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
+        }
+    )
+
+    assert spider.rewrite_image_url("https://example.com/media/photo.jpg") == (
+        f"images/{local_image_path('https://example.com/media/photo.jpg')}"
+    )
+
+
 def test_published_media_path_changes_when_profile_args_change() -> None:
    source_url = "https://example.com/media/clip.mp4"
    audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
@ -113,6 +141,41 @@ def test_published_media_path_changes_when_profile_args_change() -> None:
    ) != published_media_path(FileType.VIDEO, source_url, base_profile)


+def test_published_image_and_thumbnail_paths_change_when_profile_args_change() -> None:
+    source_url = "https://example.com/media/photo.png"
+    base_image_profile = repub_settings.REPUBLISHER_IMAGE[0]
+    base_thumbnail_profile = repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0]
+
+    assert canonical_published_image_path(
+        source_url,
+        repub_settings.REPUBLISHER_IMAGE,
+    ) == published_image_path(source_url, base_image_profile)
+
+    changed_image_profile = {
+        **base_image_profile,
+        "transform_kwargs": {
+            **base_image_profile["transform_kwargs"],
+            "width": 2048,
+        },
+    }
+    assert published_image_path(
+        source_url,
+        changed_image_profile,
+    ) != published_image_path(source_url, base_image_profile)
+
+    changed_thumbnail_profile = {
+        **base_thumbnail_profile,
+        "save_kwargs": {
+            **base_thumbnail_profile["save_kwargs"],
+            "Q": 60,
+        },
+    }
+    assert thumbnail_image_path(
+        source_url,
+        changed_thumbnail_profile,
+    ) != thumbnail_image_path(source_url, base_thumbnail_profile)
+
+
 def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
    feed_text = """<?xml version="1.0" encoding="UTF-8"?>
 <rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
@ -138,6 +201,7 @@ def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
            "REPUBLISHER_FILE_DIR": "files",
            "REPUBLISHER_AUDIO_DIR": "audio",
            "REPUBLISHER_VIDEO_DIR": "video",
+            "REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
            "REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
            "REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
        }
--- a/tests/test_pipelines.py
+++ b/tests/test_pipelines.py
@ -20,17 +20,20 @@ from repub.items import ElementItem
 from repub.pipelines import (
    AudioPipeline,
    FilePipeline,
-    ImagePipeline,
+    ImageNormalizePipeline,
+    ImageThumbnailPipeline,
    VideoPipeline,
-    convert_image_body_to_jpeg,
    image_mimetype,
 )
 from repub.utils import (
    FileType,
+    canonical_published_image_path,
    local_audio_path,
-    local_image_path,
    local_video_path,
+    published_image_path,
    published_media_path,
+    source_image_path,
+    thumbnail_image_path,
 )


@ -54,8 +57,15 @@ def build_test_crawler(tmp_path: Path) -> SimpleNamespace:
    return SimpleNamespace(settings=settings, request_fingerprinter=object())


+class HashableSpiderInfo:
+    __hash__ = object.__hash__
+
+    def __init__(self) -> None:
+        self.spider = SimpleNamespace()
+
+
 def spider_info() -> Any:
-    return SimpleNamespace(spider=SimpleNamespace())
+    return HashableSpiderInfo()


 def store_dir(pipeline: Any) -> Path:
@ -66,13 +76,14 @@ def transparent_png_bytes() -> bytes:
    return cast(Any, pyvips.Image.black(2, 3, bands=4)).pngsave_buffer()


-def jpeg_bytes() -> bytes:
-    return cast(Any, pyvips.Image.black(4, 5, bands=3)).jpegsave_buffer(Q=90)
+def png_bytes(width: int, height: int, *, bands: int = 4) -> bytes:
+    return cast(Any, pyvips.Image.black(width, height, bands=bands)).pngsave_buffer()


@pytest.mark.parametrize(
    ("pipeline_cls", "store_setting"),
    [
+        (ImageNormalizePipeline, "IMAGES_STORE"),
        (AudioPipeline, "AUDIO_STORE"),
        (VideoPipeline, "VIDEO_STORE"),
        (FilePipeline, "FILES_STORE"),
@ -647,39 +658,16 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
    assert completed_item.audios == [result]


-def test_convert_image_body_to_jpeg_flattens_alpha_png() -> None:
-    converted, width, height = convert_image_body_to_jpeg(transparent_png_bytes())
-
-    assert (width, height) == (2, 3)
-    assert converted.getvalue().startswith(b"\xff\xd8\xff")
-
-    image = cast(Any, pyvips.Image.new_from_buffer(converted.getvalue(), ""))
-    assert image.width == 2
-    assert image.height == 3
-    assert image.bands == 3
-    assert min(image.getpoint(0, 0)) >= 240
-
-
-def test_convert_image_body_to_jpeg_passthroughs_jpeg_bytes() -> None:
-    source = jpeg_bytes()
-
-    converted, width, height = convert_image_body_to_jpeg(source)
-
-    assert (width, height) == (4, 5)
-    assert converted.getvalue() == source
-
-
 def test_image_mimetype_does_not_guess_from_url_extension() -> None:
    assert image_mimetype(url="https://example.com/photo.jpg") is None


-def test_image_pipeline_media_downloaded_persists_converted_jpeg_and_sets_images(
+def test_image_normalize_pipeline_media_downloaded_persists_source_and_variants(
    monkeypatch, tmp_path: Path
 ) -> None:
    crawler = build_test_crawler(tmp_path)
-    pipeline = ImagePipeline.from_crawler(cast(Crawler, crawler))
+    pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
    monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
-    persisted: list[tuple[str, bytes, dict[str, Any] | None, str | None]] = []
    source_url = "https://example.com/photo.png"
    item = ElementItem(
        feed_name="nasa",
@ -693,21 +681,179 @@ def test_image_pipeline_media_downloaded_persists_converted_jpeg_and_sets_images
        video_urls=[],
        videos=[],
    )
-
-    def fake_persist_file(path, buf, info, meta=None, headers=None):
-        del info
-        persisted.append(
-            (
-                path,
-                buf.getvalue(),
-                cast(dict[str, Any] | None, meta),
-                None if headers is None else headers.get("Content-Type"),
-            )
-        )
-
-    monkeypatch.setattr(pipeline.store, "persist_file", fake_persist_file)
+    canonical_path = canonical_published_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE"],
+    )
+    source_path = source_image_path(source_url, "image/png")
+    webp_path = published_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE"][0],
+    )
+    jpeg_path = published_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE"][1],
+    )
+    source_body = transparent_png_bytes()

    result = pipeline.media_downloaded(
+        Response(
+            url=source_url,
+            body=source_body,
+            status=200,
+            headers={"Content-Type": "image/png"},
+        ),
+        Request(source_url),
+        spider_info(),
+        item=item,
+    )
+    webp_file_size = result["variants"][0].get("fileSize")
+    jpeg_file_size = result["variants"][1].get("fileSize")
+
+    assert result == {
+        "url": source_url,
+        "path": canonical_path,
+        "published_url": f"https://mirror.example/feeds/nasa/images/{canonical_path}",
+        "checksum": result["checksum"],
+        "status": "downloaded",
+        "source_path": source_path,
+        "variants": [
+            {
+                "url": f"https://mirror.example/feeds/nasa/images/{webp_path}",
+                "path": webp_path,
+                "type": "image/webp",
+                "medium": "image",
+                "isDefault": "true",
+                "fileSize": webp_file_size,
+                "width": 2,
+                "height": 3,
+            },
+            {
+                "url": f"https://mirror.example/feeds/nasa/images/{jpeg_path}",
+                "path": jpeg_path,
+                "type": "image/jpeg",
+                "medium": "image",
+                "isDefault": "false",
+                "fileSize": jpeg_file_size,
+                "width": 2,
+                "height": 3,
+            },
+        ],
+        "thumbnails": [],
+    }
+    assert isinstance(result["checksum"], str)
+    assert isinstance(webp_file_size, int)
+    assert isinstance(jpeg_file_size, int)
+    assert (store_dir(pipeline) / source_path).read_bytes() == source_body
+    webp_image = cast(
+        Any,
+        pyvips.Image.new_from_file(str(store_dir(pipeline) / webp_path)),
+    )
+    jpeg_image = cast(
+        Any,
+        pyvips.Image.new_from_file(str(store_dir(pipeline) / jpeg_path)),
+    )
+    assert (webp_image.width, webp_image.height) == (2, 3)
+    assert (jpeg_image.width, jpeg_image.height) == (2, 3)
+    assert jpeg_image.bands == 3
+
+    completed_item = pipeline.item_completed([(True, result)], item, spider_info())
+    assert completed_item.images == [result]
+
+
+def test_image_thumbnail_pipeline_generates_named_thumbnails_from_source_image(
+    monkeypatch, tmp_path: Path
+) -> None:
+    crawler = build_test_crawler(tmp_path)
+    normalize_pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
+    thumbnail_pipeline = ImageThumbnailPipeline.from_crawler(cast(Crawler, crawler))
+    monkeypatch.setattr(normalize_pipeline, "inc_stats", lambda status: None)
+    source_url = "https://example.com/photo.png"
+    source_body = png_bytes(1200, 900)
+    item = ElementItem(
+        feed_name="nasa",
+        el=None,
+        image_urls=[source_url],
+        images=[],
+        file_urls=[],
+        files=[],
+        audio_urls=[],
+        audios=[],
+        video_urls=[],
+        videos=[],
+    )
+
+    normalized = normalize_pipeline.media_downloaded(
+        Response(
+            url=source_url,
+            body=source_body,
+            status=200,
+            headers={"Content-Type": "image/png"},
+        ),
+        Request(source_url),
+        spider_info(),
+        item=item,
+    )
+    item.images = [normalized]
+
+    processed = thumbnail_pipeline.process_item(item, spider_info().spider)
+    thumbnails = processed.images[0]["thumbnails"]
+    thumb_slots = [thumb.get("slot") for thumb in thumbnails]
+    first_thumb = thumbnails[0]
+    second_thumb = thumbnails[1]
+
+    assert processed.images[0]["path"] == canonical_published_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE"],
+    )
+    assert thumb_slots == ["card_hero", "list_square"]
+    assert first_thumb.get("path") == thumbnail_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][0],
+    )
+    assert first_thumb.get("type") == "image/jpeg"
+    assert first_thumb.get("width") == 640
+    assert first_thumb.get("height") == 360
+    assert second_thumb.get("path") == thumbnail_image_path(
+        source_url,
+        crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][1],
+    )
+    assert second_thumb.get("width") == 160
+    assert second_thumb.get("height") == 160
+    for thumb in thumbnails:
+        thumb_path = thumb.get("path")
+        thumb_width = thumb.get("width")
+        thumb_height = thumb.get("height")
+        thumb_image = cast(
+            Any,
+            pyvips.Image.new_from_file(
+                str(store_dir(normalize_pipeline) / str(thumb_path))
+            ),
+        )
+        assert (thumb_image.width, thumb_image.height) == (thumb_width, thumb_height)
+
+
+def test_image_normalize_pipeline_cache_hit_keeps_persisted_source_path_for_extensionless_urls(
+    monkeypatch, tmp_path: Path
+) -> None:
+    crawler = build_test_crawler(tmp_path)
+    pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
+    monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
+    source_url = "https://example.com/photo"
+    item = ElementItem(
+        feed_name="nasa",
+        el=None,
+        image_urls=[source_url],
+        images=[],
+        file_urls=[],
+        files=[],
+        audio_urls=[],
+        audios=[],
+        video_urls=[],
+        videos=[],
+    )
+
+    downloaded = pipeline.media_downloaded(
        Response(
            url=source_url,
            body=transparent_png_bytes(),
@ -719,25 +865,11 @@ def test_image_pipeline_media_downloaded_persists_converted_jpeg_and_sets_images
        item=item,
    )

-    assert result == {
-        "url": source_url,
-        "path": local_image_path(source_url),
-        "checksum": result["checksum"],
-        "status": "downloaded",
-    }
-    assert isinstance(result["checksum"], str)
-    assert len(persisted) == 1
-    assert persisted[0][0] == local_image_path(source_url)
-    assert persisted[0][2] == {"width": 2, "height": 3}
-    assert persisted[0][3] == "image/jpeg"
+    uptodate = pipeline.media_to_download(Request(source_url), spider_info(), item=item)

-    image = cast(Any, pyvips.Image.new_from_buffer(persisted[0][1], ""))
-    assert image.width == 2
-    assert image.height == 3
-    assert image.bands == 3
-
-    completed_item = pipeline.item_completed([(True, result)], item, spider_info())
-    assert completed_item.images == [result]
+    assert downloaded["source_path"].endswith(".png")
+    assert uptodate is not None
+    assert uptodate["source_path"] == downloaded["source_path"]


 def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variants(