diff --git a/flake.lock b/flake.lock
index f7a5277..86b48f0 100644
--- a/flake.lock
+++ b/flake.lock
@@ -2,18 +2,16 @@
"nodes": {
"nixpkgs": {
"locked": {
- "lastModified": 1779622335,
- "narHash": "sha256-ViA62qtL5za7V3d5I8OA9q9JcFhsVAiL5jVHwEclWqk=",
- "owner": "nixos",
- "repo": "nixpkgs",
- "rev": "705e9929918b43bd7b715dc0a878ac870449bb03",
- "type": "github"
+ "lastModified": 1774386573,
+ "narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
+ "rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
+ "revCount": 969196,
+ "type": "tarball",
+ "url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.1.969196%2Brev-46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9/019d279e-af65-79ce-92be-5dee7b1e36d4/source.tar.gz"
},
"original": {
- "owner": "nixos",
- "ref": "nixos-26.05",
- "repo": "nixpkgs",
- "type": "github"
+ "type": "tarball",
+ "url": "https://flakehub.com/f/NixOS/nixpkgs/0.1"
}
},
"pyproject-build-systems": {
@@ -29,11 +27,11 @@
]
},
"locked": {
- "lastModified": 1779676664,
- "narHash": "sha256-MbXylBTkWqVm8/VYjoULtMoVRgWBN1gSHbeRKsOsPlU=",
+ "lastModified": 1773870109,
+ "narHash": "sha256-ZoTdqZP03DcdoyxvpFHCAek4bkPUTUPUF3oCCgc3dP4=",
"owner": "pyproject-nix",
"repo": "build-system-pkgs",
- "rev": "7bff980f37fc24e09dbc986643719900c139bf12",
+ "rev": "b6e74f433b02fa4b8a7965ee24680f4867e2926f",
"type": "github"
},
"original": {
@@ -49,11 +47,11 @@
]
},
"locked": {
- "lastModified": 1778901413,
- "narHash": "sha256-GSKXTAnFqRAMlZkJrIPcQMYf+lpMr66K3i60mB9STvc=",
+ "lastModified": 1774498001,
+ "narHash": "sha256-wTfdyzzrmpuqt4TQQNqilF91v0m5Mh1stNy9h7a/WK4=",
"owner": "pyproject-nix",
"repo": "pyproject.nix",
- "rev": "a228447c3e179d477c1b6246ef3efa8cfe3c469a",
+ "rev": "794afa6eb588b498344f2eaa36ab1ceb7e6b0b09",
"type": "github"
},
"original": {
@@ -78,11 +76,11 @@
]
},
"locked": {
- "lastModified": 1775636079,
- "narHash": "sha256-pc20NRoMdiar8oPQceQT47UUZMBTiMdUuWrYu2obUP0=",
+ "lastModified": 1773297127,
+ "narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
"owner": "numtide",
"repo": "treefmt-nix",
- "rev": "790751ff7fd3801feeaf96d7dc416a8d581265ba",
+ "rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
"type": "github"
},
"original": {
@@ -101,11 +99,11 @@
]
},
"locked": {
- "lastModified": 1779411315,
- "narHash": "sha256-IMFlxeyClau51KplhhSRGhdGTvD/knShHdybP1UOTuk=",
+ "lastModified": 1774705889,
+ "narHash": "sha256-TRTIM18gP3ccBj3m8bV1zx82xeYweNYp8/lgcdR4Zz0=",
"owner": "pyproject-nix",
"repo": "uv2nix",
- "rev": "fdf2a76275d7a9c27deb5d2f2ab33526ac9052ff",
+ "rev": "28355ed75b466a15ff324e1baa151b550619fe67",
"type": "github"
},
"original": {
diff --git a/flake.nix b/flake.nix
index c574d90..2d4cda9 100644
--- a/flake.nix
+++ b/flake.nix
@@ -2,7 +2,7 @@
description = "republisher-redux - offline RSS and Atom feed mirroring";
inputs = {
- nixpkgs.url = "github:nixos/nixpkgs/nixos-26.05";
+ nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1";
treefmt-nix = {
url = "github:numtide/treefmt-nix";
inputs.nixpkgs.follows = "nixpkgs";
@@ -63,12 +63,6 @@
feedgen = prev.feedgen.overrideAttrs (old: {
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
});
- pyvips = prev.pyvips.overrideAttrs (old: {
- nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
- final.setuptools
- final.pkgconfig
- ];
- });
pygea = prev.pygea.overrideAttrs (old: {
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
final.hatchling
@@ -114,7 +108,6 @@
checkPhase = ''
runHook preCheck
export HOME="$(mktemp -d)"
- export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ pkgs.vips ]}:$LD_LIBRARY_PATH"
pytest tests/ -v
runHook postCheck
'';
@@ -132,8 +125,7 @@
postBuild = ''
rm -f "$out/bin/repub"
makeWrapper "${baseVenv}/bin/repub" "$out/bin/repub" \
- --prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}" \
- --prefix LD_LIBRARY_PATH : "${pkgs.lib.makeLibraryPath [ pkgs.vips ]}"
+ --prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}"
'';
meta.mainProgram = "repub";
};
@@ -281,14 +273,12 @@
packages = [
pkgs.tailwindcss_4
pkgs.python313
- pkgs.vips
pkgs.uv
pkgs.pyright
(mkFfmpegPackage pkgs)
];
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
pkgs.stdenv.cc.cc
- pkgs.vips
];
env.UV_PROJECT_ENVIRONMENT = ".venv";
env.UV_PYTHON_DOWNLOADS = "never";
diff --git a/pyproject.toml b/pyproject.toml
index baddc3c..b87027b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
"colorlog>=6.8.2,<7.0.0",
"feedparser>=6.0.11,<7.0.0",
"lxml>=5.2.1,<6.0.0",
- "pyvips>=3.0.0,<4.0.0",
+ "pillow>=10.3.0,<11.0.0",
"ffmpeg-python>=0.2.0,<0.3.0",
"Quart>=0.20.0,<0.21.0",
"hypercorn>=0.18.0,<0.19.0",
diff --git a/repub/config.py b/repub/config.py
index d17c7d7..e9e86b3 100644
--- a/repub/config.py
+++ b/repub/config.py
@@ -188,31 +188,21 @@ def build_feed_settings(
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
- image_normalize_enabled = convert_images and base_settings.getbool(
- "REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True
- )
- image_thumbnails_enabled = image_normalize_enabled and base_settings.getbool(
- "REPUBLISHER_IMAGE_THUMBNAILS_ENABLED", True
- )
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
- item_pipelines.pop("repub.pipelines.ImageNormalizePipeline", None)
- item_pipelines.pop("repub.pipelines.ImageThumbnailPipeline", None)
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
item_pipelines.pop("repub.pipelines.FilePipeline", None)
item_pipelines.update(
{
- "repub.pipelines.AudioPipeline": 3,
- "repub.pipelines.FilePipeline": 5,
+ "repub.pipelines.AudioPipeline": 2,
+ "repub.pipelines.FilePipeline": 4,
}
)
- if image_normalize_enabled:
- item_pipelines["repub.pipelines.ImageNormalizePipeline"] = 1
- if image_thumbnails_enabled:
- item_pipelines["repub.pipelines.ImageThumbnailPipeline"] = 2
+ if convert_images:
+ item_pipelines["repub.pipelines.ImagePipeline"] = 1
if convert_video:
- item_pipelines["repub.pipelines.VideoPipeline"] = 4
+ item_pipelines["repub.pipelines.VideoPipeline"] = 3
settings = base_settings.copy()
settings.setdict(
{
@@ -229,8 +219,6 @@ def build_feed_settings(
"LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
"HTTPCACHE_DIR": str(out_dir / "httpcache"),
"REPUBLISHER_IMAGE_DIR": image_dir,
- "REPUBLISHER_IMAGE_NORMALIZE_ENABLED": image_normalize_enabled,
- "REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": image_thumbnails_enabled,
"REPUBLISHER_VIDEO_DIR": video_dir,
"REPUBLISHER_AUDIO_DIR": audio_dir,
"REPUBLISHER_FILE_DIR": file_dir,
diff --git a/repub/exporters.py b/repub/exporters.py
index ab954c9..99b0663 100644
--- a/repub/exporters.py
+++ b/repub/exporters.py
@@ -9,17 +9,12 @@ from repub.items import (
ChannelElementItem,
ElementItem,
MediaVariant,
- ThumbnailVariant,
- TranscodedImageFile,
TranscodedMediaFile,
)
from repub.utils import FileType, determine_file_type
MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
-MEDIA_THUMBNAIL_TAG = QName(rss.nsmap["media"], "thumbnail").text
-ANYNEWS_SLOT_ATTR = QName(rss.nsmap["anynews"], "slot").text
-ANYNEWS_TYPE_ATTR = QName(rss.nsmap["anynews"], "type").text
class RssExporter(BaseItemExporter):
@@ -57,9 +52,7 @@ class RssExporter(BaseItemExporter):
key: str(value) for key, value in attrib.items() if value not in (None, "")
}
- def canonical_variant(
- self, media_file: TranscodedMediaFile | TranscodedImageFile
- ) -> MediaVariant | None:
+ def canonical_variant(self, media_file: TranscodedMediaFile) -> MediaVariant | None:
for variant in media_file["variants"]:
if variant.get("isDefault") == "true":
return variant
@@ -99,8 +92,6 @@ class RssExporter(BaseItemExporter):
def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
fallbacks: dict[str, dict[str, str]] = {}
managed_types: set[FileType] = set()
- if self.managed_image_files(item):
- managed_types.add(FileType.IMAGE)
if item.audios:
managed_types.add(FileType.AUDIO)
if item.videos:
@@ -109,9 +100,6 @@ class RssExporter(BaseItemExporter):
return fallbacks
for child in list(item.el):
- if child.tag == MEDIA_THUMBNAIL_TAG and FileType.IMAGE in managed_types:
- item.el.remove(child)
- continue
if child.tag == MEDIA_CONTENT_TAG:
if self.owned_media_type(child, managed_types) is None:
continue
@@ -125,43 +113,25 @@ class RssExporter(BaseItemExporter):
if child.tag != MEDIA_GROUP_TAG:
continue
- managed_image_group = False
for media_content in list(child):
if media_content.tag != MEDIA_CONTENT_TAG:
continue
- owned_type = self.owned_media_type(media_content, managed_types)
- if owned_type is None:
+ if self.owned_media_type(media_content, managed_types) is None:
continue
- if owned_type == FileType.IMAGE:
- managed_image_group = True
fallbacks[media_content.get("url", "")] = {
key: value
for key, value in media_content.attrib.items()
if key in {"expression", "lang"}
}
child.remove(media_content)
- if managed_image_group:
- for media_thumbnail in list(child):
- if media_thumbnail.tag == MEDIA_THUMBNAIL_TAG:
- child.remove(media_thumbnail)
if len(child) == 0:
item.el.remove(child)
return fallbacks
- def managed_image_files(self, item: ElementItem) -> list[TranscodedImageFile]:
- media_image_urls = set(item.media_image_urls)
- if not media_image_urls:
- return []
- return [image for image in item.images if image["url"] in media_image_urls]
-
def append_media_groups(
self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
):
- for media_file in [
- *self.managed_image_files(item),
- *item.audios,
- *item.videos,
- ]:
+ for media_file in [*item.audios, *item.videos]:
if not media_file["variants"]:
continue
fallback_attrib = fallbacks.get(media_file["published_url"], {})
@@ -171,11 +141,7 @@ class RssExporter(BaseItemExporter):
**self.media_content_attrib(variant, fallback_attrib)
)
for variant in media_file["variants"]
- ],
- *[
- rss.MEDIA.thumbnail(**self.media_thumbnail_attrib(thumbnail))
- for thumbnail in media_file.get("thumbnails", [])
- ],
+ ]
)
if group is not None:
item.el.append(group)
@@ -204,22 +170,10 @@ class RssExporter(BaseItemExporter):
)
return attrib
- def media_thumbnail_attrib(self, thumbnail: ThumbnailVariant) -> dict[str, str]:
- attrib = self.compact_attrib(
- url=thumbnail.get("url"),
- width=thumbnail.get("width"),
- height=thumbnail.get("height"),
- )
- if thumbnail.get("slot"):
- attrib[ANYNEWS_SLOT_ATTR] = str(thumbnail["slot"])
- if thumbnail.get("type"):
- attrib[ANYNEWS_TYPE_ATTR] = str(thumbnail["type"])
- return attrib
-
def apply_transcoded_media(self, item: Any) -> None:
if not isinstance(item, ElementItem):
return
- if not self.managed_image_files(item) and not item.audios and not item.videos:
+ if not item.audios and not item.videos:
return
self.rebuild_enclosures(item)
fallbacks = self.strip_managed_media_nodes(item)
diff --git a/repub/items.py b/repub/items.py
index 310da3f..d5e77be 100644
--- a/repub/items.py
+++ b/repub/items.py
@@ -1,4 +1,4 @@
-from dataclasses import dataclass, field
+from dataclasses import dataclass
from typing import Any, List, TypedDict
@@ -8,7 +8,7 @@ class MediaVariant(TypedDict, total=False):
type: str
medium: str
isDefault: str
- fileSize: int | str
+ fileSize: str
bitrate: int | float | str
samplingrate: int | str
channels: int | str
@@ -29,39 +29,18 @@ class TranscodedMediaFile(TypedDict):
variants: List[MediaVariant]
-class ThumbnailVariant(TypedDict, total=False):
- url: str
- path: str
- width: int | str
- height: int | str
- slot: str
- type: str
-
-
-class TranscodedImageFile(TypedDict):
- url: str
- path: str
- checksum: str | None
- status: str
- published_url: str
- source_path: str
- variants: List[MediaVariant]
- thumbnails: List[ThumbnailVariant]
-
-
@dataclass
class ElementItem:
feed_name: str
el: Any
image_urls: List[str]
- images: List[TranscodedImageFile]
+ images: List[Any]
file_urls: List[str]
files: List[Any]
audio_urls: List[str]
audios: List[TranscodedMediaFile]
video_urls: List[str]
videos: List[TranscodedMediaFile]
- media_image_urls: List[str] = field(default_factory=list)
@dataclass
@@ -69,5 +48,4 @@ class ChannelElementItem:
feed_name: str
el: Any
image_urls: List[str]
- images: List[TranscodedImageFile]
- media_image_urls: List[str] = field(default_factory=list)
+ images: List[Any]
diff --git a/repub/pipelines.py b/repub/pipelines.py
index 69a6c73..c2b11e3 100644
--- a/repub/pipelines.py
+++ b/repub/pipelines.py
@@ -1,4 +1,3 @@
-import functools
import hashlib
import logging
import mimetypes
@@ -9,482 +8,24 @@ from os import PathLike
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, cast
-import pyvips
from scrapy.crawler import Crawler
from scrapy.pipelines.files import FileException
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
+from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
import repub.utils
from repub import media
-from repub.items import (
- MediaVariant,
- ThumbnailVariant,
- TranscodedImageFile,
- TranscodedMediaFile,
-)
+from repub.items import MediaVariant, TranscodedMediaFile
logger = logging.getLogger(__name__)
-class ImageException(FileException):
- """General image error exception"""
-
-
-def image_mimetype(response=None, *, url: str | None = None) -> str | None:
- del url
- if response is not None:
- content_type = response.headers.get(b"Content-Type")
- if content_type:
- return content_type.decode("utf-8").split(";", 1)[0].strip()
- return None
-
-
-def image_loader_name(image: Any) -> str:
- if image.get_typeof("vips-loader"):
- return str(image.get("vips-loader"))
- return ""
-
-
-def image_loader_mimetype(loader: str, fallback: str | None = None) -> str | None:
- known = {
- "jpegload": "image/jpeg",
- "pngload": "image/png",
- "gifload": "image/gif",
- "svgload": "image/svg+xml",
- "tiffload": "image/tiff",
- "webpload": "image/webp",
- "heifload": "image/heif",
- "jxlload": "image/jxl",
- }
- for prefix, mimetype in known.items():
- if loader.startswith(prefix):
- return mimetype
- return fallback
-
-
-def load_image_from_buffer(body: bytes) -> Any:
- try:
- return cast(
- Any,
- pyvips.Image.new_from_buffer(body, "", access="sequential"),
- )
- except pyvips.Error as exc:
- raise ImageException(str(exc)) from exc
-
-
-def load_image_from_file(file_path: str | Path) -> Any:
- try:
- return cast(
- Any,
- pyvips.Image.new_from_file(str(file_path), access="sequential"),
- )
- except pyvips.Error as exc:
- raise ImageException(str(exc)) from exc
-
-
-def render_image_profile(source_path: str | Path, profile: dict[str, Any]) -> BytesIO:
- transform = str(profile["transform"])
- transform_kwargs = dict(profile.get("transform_kwargs", {}))
- width = int(transform_kwargs.pop("width"))
- if transform == "thumbnail":
- image = cast(
- Any,
- pyvips.Image.thumbnail(str(source_path), width, **transform_kwargs),
- )
- elif transform == "thumbnail_buffer":
- image = cast(
- Any,
- pyvips.Image.thumbnail_buffer(
- Path(source_path).read_bytes(),
- width,
- **transform_kwargs,
- ),
- )
- else:
- raise ImageException(f"Unsupported image transform: {transform}")
-
- image = image.colourspace("srgb")
- if image.hasalpha() and (
- profile["mimetype"] == "image/jpeg"
- or "background" in profile.get("save_kwargs", {})
- ):
- image = image.flatten(
- background=profile.get("save_kwargs", {}).get("background", [255, 255, 255])
- )
-
- save_name = str(profile["save"])
- try:
- image_bytes = getattr(image, save_name)(**dict(profile.get("save_kwargs", {})))
- except pyvips.Error as exc:
- raise ImageException(str(exc)) from exc
- return BytesIO(cast(bytes, image_bytes))
-
-
-def image_buffer_meta(
- body: bytes,
- *,
- fallback_mimetype: str | None = None,
-) -> tuple[int, int, int, str | None]:
- image = load_image_from_buffer(body)
- mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
- return image.width, image.height, len(body), mimetype
-
-
-def image_variant_meta(
- file_path: str | Path,
- *,
- fallback_mimetype: str | None = None,
-) -> tuple[int, int, int, str | None]:
- image = load_image_from_file(file_path)
- mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
- return image.width, image.height, Path(file_path).stat().st_size, mimetype
-
-
-class ImageNormalizePipeline(BaseFilesPipeline):
- MEDIA_NAME = "image"
- EXPIRES = 90
- MIN_WIDTH = 0
- MIN_HEIGHT = 0
- DEFAULT_FILES_URLS_FIELD = "image_urls"
- DEFAULT_FILES_RESULT_FIELD = "images"
-
- @classmethod
- def from_crawler(cls, crawler: Crawler):
- cls._update_stores(crawler.settings)
- return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
-
- def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
- self.settings = crawler.settings
- super().__init__(store_uri, crawler=crawler)
- resolve = functools.partial(
- self._key_for_pipe,
- base_class_name="ImagesPipeline",
- settings=self.settings,
- )
- self.expires = self.settings.getint(resolve("IMAGES_EXPIRES"), self.EXPIRES)
- self.files_urls_field = self.settings.get(
- resolve("IMAGES_URLS_FIELD"),
- self.DEFAULT_FILES_URLS_FIELD,
- )
- self.files_result_field = self.settings.get(
- resolve("IMAGES_RESULT_FIELD"),
- self.DEFAULT_FILES_RESULT_FIELD,
- )
- self.min_width = self.settings.getint(
- resolve("IMAGES_MIN_WIDTH"),
- self.MIN_WIDTH,
- )
- self.min_height = self.settings.getint(
- resolve("IMAGES_MIN_HEIGHT"),
- self.MIN_HEIGHT,
- )
-
- def get_image_settings(self) -> list[dict[str, Any]]:
- return list(self.settings["REPUBLISHER_IMAGE"])
-
+class ImagePipeline(BaseImagesPipeline):
def file_path(self, request, response=None, info=None, *, item=None):
- return repub.utils.canonical_published_image_path(
- request.url,
- self.get_image_settings(),
- )
+ return repub.utils.local_image_path(request.url)
- def source_path(self, request, response=None) -> str:
- return repub.utils.source_image_path(
- request.url,
- image_mimetype(response, url=request.url),
- )
-
- def resolve_source_path(self, request, response=None) -> str:
- source_path = self.source_path(request, response)
- if response is not None:
- return source_path
- source_file = self.local_store_path(source_path)
- if source_file.exists():
- return source_path
- source_dir = self.local_store_path(
- str(self.settings.get("REPUBLISHER_IMAGE_SOURCE_SUBDIR", "source"))
- )
- guid = repub.utils.image_guid(request.url)
- matches = sorted(source_dir.glob(f"{guid}.*"))
- if matches:
- return f"{source_dir.name}/{matches[0].name}"
- return source_path
-
- def variant_paths(self, source_url: str) -> list[tuple[bool, dict[str, Any], str]]:
- return [
- (
- index == 0,
- setting,
- repub.utils.published_image_path(source_url, setting),
- )
- for index, setting in enumerate(self.get_image_settings())
- ]
-
- def published_url(self, path: str, item=None) -> str:
- relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
- feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
- if feed_url == "" or item is None:
- return relative_path
- return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
-
- def local_store_path(self, path: str) -> Path:
- return Path(cast(Any, self.store).basedir) / path
-
- def image_variant(
- self,
- *,
- path: str,
- mimetype: str,
- width: int,
- height: int,
- file_size: int,
- is_default: bool,
- item=None,
- ) -> MediaVariant:
- variant: MediaVariant = {
- "url": self.published_url(path, item),
- "path": path,
- "type": mimetype,
- "medium": repub.utils.FileType.IMAGE.value,
- "isDefault": "true" if is_default else "false",
- "fileSize": file_size,
- "width": width,
- "height": height,
- }
- return variant
-
- def load_variants_from_disk(self, request, *, item=None) -> list[MediaVariant]:
- variants: list[MediaVariant] = []
- for is_default, setting, path in self.variant_paths(request.url):
- file_path = self.local_store_path(path)
- if not file_path.exists():
- continue
- width, height, file_size, mimetype = image_variant_meta(
- file_path,
- fallback_mimetype=setting["mimetype"],
- )
- variants.append(
- self.image_variant(
- path=path,
- mimetype=mimetype or setting["mimetype"],
- width=width,
- height=height,
- file_size=file_size,
- is_default=is_default,
- item=item,
- )
- )
- return variants
-
- def make_file_result(
- self,
- request,
- *,
- checksum: str | None,
- status: str,
- response=None,
- item=None,
- ) -> TranscodedImageFile:
- path = self.file_path(request, item=item)
- return {
- "url": request.url,
- "path": path,
- "published_url": self.published_url(path, item),
- "checksum": checksum,
- "status": status,
- "source_path": self.resolve_source_path(request, response),
- "variants": self.load_variants_from_disk(request, item=item),
- "thumbnails": [],
- }
-
- def media_to_download(self, request, info, *, item=None):
- canonical_path = self.file_path(request, info=info, item=item)
- canonical_stat = cast(
- dict[str, Any] | None,
- self.store.stat_file(canonical_path, info),
- )
- if not canonical_stat:
- return None
- last_modified = canonical_stat.get("last_modified")
- if not last_modified:
- return None
- age_days = (time.time() - last_modified) / 60 / 60 / 24
- if age_days > self.expires:
- return None
- if not cast(
- dict[str, Any] | None,
- self.store.stat_file(self.resolve_source_path(request), info),
- ):
- return None
- for _, _, path in self.variant_paths(request.url):
- if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
- return None
- self.inc_stats("uptodate")
- return self.make_file_result(
- request,
- checksum=canonical_stat.get("checksum"),
- status="uptodate",
- item=item,
- )
-
- def persist_variants(self, response, request, info, *, item=None) -> str | None:
- source_file_path = self.local_store_path(self.source_path(request, response))
- source_buf = BytesIO(response.body)
- source_image = load_image_from_buffer(response.body).autorot()
- if source_image.width < self.min_width or source_image.height < self.min_height:
- raise ImageException(
- "Image too small "
- f"({source_image.width}x{source_image.height} < "
- f"{self.min_width}x{self.min_height})"
- )
- if not cast(
- dict[str, Any] | None,
- self.store.stat_file(self.source_path(request, response), info),
- ):
- self.store.persist_file(
- self.source_path(request, response),
- source_buf,
- info,
- meta={"width": source_image.width, "height": source_image.height},
- headers={
- "Content-Type": image_loader_mimetype(
- image_loader_name(source_image),
- image_mimetype(response, url=request.url),
- )
- or "application/octet-stream"
- },
- )
- canonical_path = self.file_path(
- request, response=response, info=info, item=item
- )
- canonical_checksum = None
- for _, setting, final_path in self.variant_paths(request.url):
- stat = cast(dict[str, Any] | None, self.store.stat_file(final_path, info))
- if stat:
- if final_path == canonical_path:
- canonical_checksum = stat.get("checksum")
- continue
- out_buf = render_image_profile(source_file_path, setting)
- width, height, file_size, _ = image_buffer_meta(
- out_buf.getvalue(),
- fallback_mimetype=setting["mimetype"],
- )
- checksum = buffer_checksum(out_buf)
- self.store.persist_file(
- final_path,
- out_buf,
- info,
- meta={"width": width, "height": height, "fileSize": file_size},
- headers={"Content-Type": setting["mimetype"]},
- )
- if final_path == canonical_path:
- canonical_checksum = checksum
- return canonical_checksum
-
- def media_downloaded(self, response, request, info, *, item=None):
- if response.status != 200:
- raise FileException("download-error")
- if not response.body:
- raise FileException("empty-content")
- status = "cached" if "cached" in response.flags else "downloaded"
- self.inc_stats(status)
- checksum = self.persist_variants(response, request, info, item=item)
- return self.make_file_result(
- request,
- checksum=checksum,
- status=status,
- response=response,
- item=item,
- )
-
-
-class ImageThumbnailPipeline:
- @classmethod
- def from_crawler(cls, crawler: Crawler):
- return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
-
- def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
- self.settings = crawler.settings
- self.store_dir = Path(store_uri)
-
- def get_thumbnail_settings(self) -> list[dict[str, Any]]:
- return list(self.settings["REPUBLISHER_IMAGE_THUMBNAILS"])
-
- def local_store_path(self, path: str) -> Path:
- return self.store_dir / path
-
- def published_url(self, path: str, item=None) -> str:
- relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
- feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
- if feed_url == "" or item is None:
- return relative_path
- return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
-
- def persist_thumbnail(
- self, source_file: Path, final_path: str, profile: dict[str, Any]
- ):
- out_buf = render_image_profile(source_file, profile)
- target = self.local_store_path(final_path)
- target.parent.mkdir(parents=True, exist_ok=True)
- target.write_bytes(out_buf.getvalue())
-
- def load_thumbnail(
- self,
- *,
- source_url: str,
- profile: dict[str, Any],
- item=None,
- ) -> ThumbnailVariant | None:
- final_path = repub.utils.thumbnail_image_path(source_url, profile)
- file_path = self.local_store_path(final_path)
- if not file_path.exists():
- return None
- width, height, _, mimetype = image_variant_meta(
- file_path,
- fallback_mimetype=profile["mimetype"],
- )
- return {
- "url": self.published_url(final_path, item),
- "path": final_path,
- "slot": str(profile["name"]),
- "type": mimetype or profile["mimetype"],
- "width": width,
- "height": height,
- }
-
- def process_item(self, item, spider):
- del spider
- if not getattr(item, "images", None):
- return item
- for image in item.images:
- source_path = image.get("source_path")
- if not source_path:
- image["thumbnails"] = []
- continue
- source_file = self.local_store_path(source_path)
- thumbnails: list[ThumbnailVariant] = []
- for profile in self.get_thumbnail_settings():
- final_path = repub.utils.thumbnail_image_path(image["url"], profile)
- if not self.local_store_path(final_path).exists():
- try:
- self.persist_thumbnail(source_file, final_path, profile)
- except ImageException as exc:
- logger.warning(
- "Failed to generate thumbnail for %s: %s", image["url"], exc
- )
- continue
- thumbnail = self.load_thumbnail(
- source_url=image["url"],
- profile=profile,
- item=item,
- )
- if thumbnail is not None:
- thumbnails.append(thumbnail)
- image["thumbnails"] = thumbnails
- return item
-
-
-ImagePipeline = ImageNormalizePipeline
+ def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
+ raise NotImplementedError()
class FilePipeline(BaseFilesPipeline):
diff --git a/repub/rss.py b/repub/rss.py
index 4b0ba84..b2274c0 100644
--- a/repub/rss.py
+++ b/repub/rss.py
@@ -46,7 +46,6 @@ nsmap = {
"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
"dc": "http://purl.org/dc/elements/1.1/",
"atom": "http://www.w3.org/2005/Atom",
- "anynews": "https://guardianproject.info/rss/anynews/1.0",
}
CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
diff --git a/repub/settings.py b/repub/settings.py
index 5b0cfcb..252c974 100644
--- a/repub/settings.py
+++ b/repub/settings.py
@@ -100,116 +100,6 @@ LOG_LEVEL = "INFO"
MEDIA_ALLOW_REDIRECTS = True
-REPUBLISHER_IMAGE_NORMALIZE_ENABLED = True
-REPUBLISHER_IMAGE_THUMBNAILS_ENABLED = True
-
-REPUBLISHER_IMAGE_DIR = "images"
-REPUBLISHER_IMAGE_FULL_SUBDIR = "full"
-REPUBLISHER_IMAGE_SOURCE_SUBDIR = "source"
-REPUBLISHER_IMAGE_THUMBNAIL_SUBDIR = "thumbs"
-
-REPUBLISHER_IMAGE = [
- {
- "name": "main_webp",
- "mimetype": "image/webp",
- "extension": "webp",
- "transform": "thumbnail",
- "transform_kwargs": {
- "width": 1600,
- "height": 1600,
- "size": "down",
- "no_rotate": False,
- "linear": False,
- "fail_on": "warning",
- },
- "save": "webpsave_buffer",
- "save_kwargs": {
- "Q": 82,
- "preset": "photo",
- "smart_subsample": True,
- "effort": 4,
- "alpha_q": 90,
- "keep": "none",
- },
- },
- {
- "name": "fallback_jpeg",
- "mimetype": "image/jpeg",
- "extension": "jpg",
- "transform": "thumbnail",
- "transform_kwargs": {
- "width": 1600,
- "height": 1600,
- "size": "down",
- "no_rotate": False,
- "linear": False,
- "fail_on": "warning",
- },
- "save": "jpegsave_buffer",
- "save_kwargs": {
- "Q": 85,
- "interlace": True,
- "optimize_coding": True,
- "trellis_quant": True,
- "optimize_scans": True,
- "subsample_mode": "auto",
- "keep": "none",
- "background": [255, 255, 255],
- },
- },
-]
-
-REPUBLISHER_IMAGE_THUMBNAILS = [
- {
- "name": "card_hero",
- "mimetype": "image/jpeg",
- "extension": "jpg",
- "transform": "thumbnail",
- "transform_kwargs": {
- "width": 640,
- "height": 360,
- "size": "down",
- "crop": "attention",
- "no_rotate": False,
- "linear": False,
- "fail_on": "warning",
- },
- "save": "jpegsave_buffer",
- "save_kwargs": {
- "Q": 82,
- "interlace": True,
- "optimize_coding": True,
- "subsample_mode": "auto",
- "keep": "none",
- "background": [255, 255, 255],
- },
- },
- {
- "name": "list_square",
- "mimetype": "image/jpeg",
- "extension": "jpg",
- "transform": "thumbnail",
- "transform_kwargs": {
- "width": 160,
- "height": 160,
- "size": "down",
- "crop": "centre",
- "no_rotate": False,
- "linear": False,
- "fail_on": "warning",
- },
- "save": "jpegsave_buffer",
- "save_kwargs": {
- "Q": 78,
- "interlace": True,
- "optimize_coding": True,
- "subsample_mode": "auto",
- "keep": "none",
- "background": [255, 255, 255],
- },
- },
-]
-
REPUBLISHER_AUDIO = [
{
"name": "mp3_vbr7_voice",
diff --git a/repub/spiders/rss_spider.py b/repub/spiders/rss_spider.py
index 5b11129..fa27317 100644
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@@ -21,7 +21,6 @@ from repub.rss import (
)
from repub.utils import (
FileType,
- canonical_published_image_path,
canonical_published_media_path,
determine_file_type,
local_file_path,
@@ -55,16 +54,7 @@ class BaseRssFeedSpider(Spider):
local_path = local_file_path(url)
if file_type == FileType.IMAGE:
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
- image_profiles = (
- self.settings.get("REPUBLISHER_IMAGE") or []
- if self.settings.getbool("REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True)
- else []
- )
- local_path = (
- canonical_published_image_path(url, image_profiles)
- if image_profiles
- else local_image_path(url)
- )
+ local_path = local_image_path(url)
elif file_type == FileType.VIDEO:
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
local_path = canonical_published_media_path(
@@ -288,7 +278,6 @@ class RssFeedSpider(BaseRssFeedSpider):
def parse_entry(self, response, feed, entry):
image_urls = []
- media_image_urls = []
file_urls = []
audio_urls = []
video_urls = []
@@ -334,7 +323,6 @@ class RssFeedSpider(BaseRssFeedSpider):
)
if entry.get("image"):
image_urls.append(entry.get("image").href)
- media_image_urls.append(entry.get("image").href)
for enc in entry.enclosures:
url = enc.get("href")
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
@@ -393,8 +381,6 @@ class RssFeedSpider(BaseRssFeedSpider):
)
)
add_url(file_type, media.get("url"))
- if file_type == FileType.IMAGE:
- media_image_urls.append(media.get("url"))
return ElementItem(
feed_name=self.feed_name,
el=item,
@@ -406,7 +392,6 @@ class RssFeedSpider(BaseRssFeedSpider):
audios=[],
video_urls=video_urls,
videos=[],
- media_image_urls=media_image_urls,
)
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
diff --git a/repub/static/app.css b/repub/static/app.css
index 11ab841..94b02ed 100644
--- a/repub/static/app.css
+++ b/repub/static/app.css
@@ -1,4 +1,4 @@
-/*! tailwindcss v4.3.0 | MIT License | https://tailwindcss.com */
+/*! tailwindcss v4.2.1 | MIT License | https://tailwindcss.com */
@layer properties;
@layer theme, base, components, utilities;
@layer theme {
@@ -245,6 +245,9 @@
.inset-x-0 {
inset-inline: calc(var(--spacing) * 0);
}
+ .start {
+ inset-inline-start: var(--spacing);
+ }
.top-0 {
top: calc(var(--spacing) * 0);
}
@@ -416,9 +419,6 @@
.rotate-180 {
rotate: 180deg;
}
- .transform {
- transform: var(--tw-rotate-x,) var(--tw-rotate-y,) var(--tw-rotate-z,) var(--tw-skew-x,) var(--tw-skew-y,);
- }
.animate-pulse {
animation: var(--animate-pulse);
}
@@ -1221,26 +1221,6 @@
inherits: false;
initial-value: 0;
}
-@property --tw-rotate-x {
- syntax: "*";
- inherits: false;
-}
-@property --tw-rotate-y {
- syntax: "*";
- inherits: false;
-}
-@property --tw-rotate-z {
- syntax: "*";
- inherits: false;
-}
-@property --tw-skew-x {
- syntax: "*";
- inherits: false;
-}
-@property --tw-skew-y {
- syntax: "*";
- inherits: false;
-}
@property --tw-space-y-reverse {
syntax: "*";
inherits: false;
@@ -1480,11 +1460,6 @@
--tw-translate-x: 0;
--tw-translate-y: 0;
--tw-translate-z: 0;
- --tw-rotate-x: initial;
- --tw-rotate-y: initial;
- --tw-rotate-z: initial;
- --tw-skew-x: initial;
- --tw-skew-y: initial;
--tw-space-y-reverse: 0;
--tw-space-x-reverse: 0;
--tw-divide-y-reverse: 0;
diff --git a/repub/utils.py b/repub/utils.py
index b443053..b8379a1 100644
--- a/repub/utils.py
+++ b/repub/utils.py
@@ -43,50 +43,6 @@ def local_audio_path(s: str) -> str:
return local_file_path(s)
-def image_guid(source_url: str) -> str:
- return hashlib.sha1(to_bytes(source_url)).hexdigest() # nosec
-
-
-def image_extension(mimetype_or_extension: str | None, source_url: str = "") -> str:
- if mimetype_or_extension:
- if mimetype_or_extension.startswith("."):
- extension = mimetype_or_extension
- elif "/" in mimetype_or_extension:
- extension = mimetypes.guess_extension(mimetype_or_extension) or ""
- else:
- extension = f".{mimetype_or_extension.lstrip('.')}"
- if extension == ".jpe":
- return ".jpg"
- return extension
- guessed = Path(source_url).suffix
- if guessed == ".jpe":
- return ".jpg"
- if guessed:
- return guessed
- return ".img"
-
-
-def source_image_path(source_url: str, mimetype_or_extension: str | None = None) -> str:
- extension = image_extension(mimetype_or_extension, source_url)
- return f"source/{image_guid(source_url)}{extension}"
-
-
-def published_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
- return variant_media_path(f"full/{image_guid(source_url)}", profile, hashed=True)
-
-
-def canonical_published_image_path(
- source_url: str, profiles: Sequence[Mapping[str, Any]]
-) -> str:
- if not profiles:
- raise ValueError("Missing image normalization profiles")
- return published_image_path(source_url, profiles[0])
-
-
-def thumbnail_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
- return variant_media_path(f"thumbs/{image_guid(source_url)}", profile, hashed=True)
-
-
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
settings = {
key: value
@@ -109,8 +65,6 @@ def variant_media_path(
def published_media_path(
file_type: FileType, source_url: str, profile: Mapping[str, Any]
) -> str:
- if file_type == FileType.IMAGE:
- return published_image_path(source_url, profile)
if file_type == FileType.AUDIO:
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
if file_type == FileType.VIDEO:
@@ -125,8 +79,6 @@ def canonical_published_media_path(
raise ValueError(f"Missing transcode profiles for {file_type.value}")
# The first configured profile is the public URL contract. Reordering profiles
# changes published URLs for already-mirrored media.
- if file_type == FileType.IMAGE:
- return canonical_published_image_path(source_url, profiles)
return published_media_path(file_type, source_url, profiles[0])
diff --git a/tests/test_config.py b/tests/test_config.py
index 1d5816b..cc59799 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -224,46 +224,7 @@ def test_build_feed_settings_can_disable_image_and_video_conversion(
convert_video=False,
)
- assert (
- "repub.pipelines.ImageNormalizePipeline" not in feed_settings["ITEM_PIPELINES"]
- )
- assert (
- "repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
- )
+ assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
- assert feed_settings["REPUBLISHER_IMAGE_NORMALIZE_ENABLED"] is False
- assert feed_settings["REPUBLISHER_IMAGE_THUMBNAILS_ENABLED"] is False
- assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 3
- assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 5
-
-
-def test_build_feed_settings_respects_image_pipeline_feature_flags(
- tmp_path: Path,
-) -> None:
- out_dir = (tmp_path / "mirror").resolve()
- config = RepublisherConfig(
- config_path=tmp_path / "repub.toml",
- out_dir=out_dir,
- feeds=(
- FeedConfig(
- name="Guardian Project Podcast",
- slug="gp-pod",
- url="https://guardianproject.info/podcast/podcast.xml",
- ),
- ),
- scrapy_settings={"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": False},
- )
-
- base_settings = build_base_settings(config)
- feed_settings = build_feed_settings(
- base_settings,
- out_dir=out_dir,
- feed_slug="gp-pod",
- )
-
- assert (
- feed_settings["ITEM_PIPELINES"]["repub.pipelines.ImageNormalizePipeline"] == 1
- )
- assert (
- "repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
- )
+ assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
+ assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py
index f395770..9e1f80b 100644
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@@ -16,12 +16,10 @@ from repub.rss import nsmap
from repub.spiders.rss_spider import RssFeedSpider
from repub.utils import (
FileType,
- canonical_published_image_path,
local_audio_path,
+ local_image_path,
local_video_path,
- published_image_path,
published_media_path,
- thumbnail_image_path,
)
RSS_DATE_PATTERN = re.compile(
@@ -46,7 +44,6 @@ def _serialize_feed(
"REPUBLISHER_FILE_DIR": "files",
"REPUBLISHER_AUDIO_DIR": "audio",
"REPUBLISHER_VIDEO_DIR": "video",
- "REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
"REPUBLISHER_FEED_URL": feed_url,
@@ -78,18 +75,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
source_video = "https://source.example/media/video.mp4"
channel_image = "https://source.example/media/channel.png"
item_image = "https://source.example/media/cover.jpg"
- image_main_path = published_image_path(
- source_image,
- repub_settings.REPUBLISHER_IMAGE[0],
- )
- image_fallback_path = published_image_path(
- source_image,
- repub_settings.REPUBLISHER_IMAGE[1],
- )
- image_thumbnail_path = thumbnail_image_path(
- source_image,
- repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
- )
audio_base_path = local_audio_path(source_audio)
audio_default_path = published_media_path(
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
@@ -109,60 +94,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
)
def prepare_item(item: ElementItem) -> None:
- item.images = [
- {
- "url": source_image,
- "path": image_main_path,
- "published_url": _published_url(
- "https://mirror.example",
- f"images/{image_main_path}",
- ),
- "checksum": "image-default",
- "status": "downloaded",
- "source_path": "source/ignored.png",
- "variants": [
- {
- "url": _published_url(
- "https://mirror.example",
- f"images/{image_main_path}",
- ),
- "path": image_main_path,
- "type": "image/webp",
- "medium": "image",
- "isDefault": "true",
- "fileSize": "2345",
- "width": "1200",
- "height": "675",
- },
- {
- "url": _published_url(
- "https://mirror.example",
- f"images/{image_fallback_path}",
- ),
- "path": image_fallback_path,
- "type": "image/jpeg",
- "medium": "image",
- "isDefault": "false",
- "fileSize": "3456",
- "width": "1200",
- "height": "675",
- },
- ],
- "thumbnails": [
- {
- "url": _published_url(
- "https://mirror.example",
- f"images/{image_thumbnail_path}",
- ),
- "path": image_thumbnail_path,
- "slot": "card_hero",
- "type": "image/jpeg",
- "width": "640",
- "height": "360",
- }
- ],
- }
- ]
item.audios = [
{
"url": source_audio,
@@ -330,7 +261,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
]]>