Compare commits
3 commits
180677efa7
...
18a7f652d4
| Author | SHA1 | Date | |
|---|---|---|---|
| 18a7f652d4 | |||
| 525393272e | |||
| 7316d4723f |
17 changed files with 1464 additions and 83 deletions
42
flake.lock
generated
42
flake.lock
generated
|
|
@ -2,16 +2,18 @@
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1774386573,
|
"lastModified": 1779622335,
|
||||||
"narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
|
"narHash": "sha256-ViA62qtL5za7V3d5I8OA9q9JcFhsVAiL5jVHwEclWqk=",
|
||||||
"rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
|
"owner": "nixos",
|
||||||
"revCount": 969196,
|
"repo": "nixpkgs",
|
||||||
"type": "tarball",
|
"rev": "705e9929918b43bd7b715dc0a878ac870449bb03",
|
||||||
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.1.969196%2Brev-46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9/019d279e-af65-79ce-92be-5dee7b1e36d4/source.tar.gz"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"type": "tarball",
|
"owner": "nixos",
|
||||||
"url": "https://flakehub.com/f/NixOS/nixpkgs/0.1"
|
"ref": "nixos-26.05",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"pyproject-build-systems": {
|
"pyproject-build-systems": {
|
||||||
|
|
@ -27,11 +29,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1773870109,
|
"lastModified": 1779676664,
|
||||||
"narHash": "sha256-ZoTdqZP03DcdoyxvpFHCAek4bkPUTUPUF3oCCgc3dP4=",
|
"narHash": "sha256-MbXylBTkWqVm8/VYjoULtMoVRgWBN1gSHbeRKsOsPlU=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "build-system-pkgs",
|
"repo": "build-system-pkgs",
|
||||||
"rev": "b6e74f433b02fa4b8a7965ee24680f4867e2926f",
|
"rev": "7bff980f37fc24e09dbc986643719900c139bf12",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -47,11 +49,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1774498001,
|
"lastModified": 1778901413,
|
||||||
"narHash": "sha256-wTfdyzzrmpuqt4TQQNqilF91v0m5Mh1stNy9h7a/WK4=",
|
"narHash": "sha256-GSKXTAnFqRAMlZkJrIPcQMYf+lpMr66K3i60mB9STvc=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "pyproject.nix",
|
"repo": "pyproject.nix",
|
||||||
"rev": "794afa6eb588b498344f2eaa36ab1ceb7e6b0b09",
|
"rev": "a228447c3e179d477c1b6246ef3efa8cfe3c469a",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -76,11 +78,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1773297127,
|
"lastModified": 1775636079,
|
||||||
"narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
|
"narHash": "sha256-pc20NRoMdiar8oPQceQT47UUZMBTiMdUuWrYu2obUP0=",
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"repo": "treefmt-nix",
|
"repo": "treefmt-nix",
|
||||||
"rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
|
"rev": "790751ff7fd3801feeaf96d7dc416a8d581265ba",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -99,11 +101,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1774705889,
|
"lastModified": 1779411315,
|
||||||
"narHash": "sha256-TRTIM18gP3ccBj3m8bV1zx82xeYweNYp8/lgcdR4Zz0=",
|
"narHash": "sha256-IMFlxeyClau51KplhhSRGhdGTvD/knShHdybP1UOTuk=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "uv2nix",
|
"repo": "uv2nix",
|
||||||
"rev": "28355ed75b466a15ff324e1baa151b550619fe67",
|
"rev": "fdf2a76275d7a9c27deb5d2f2ab33526ac9052ff",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
|
||||||
14
flake.nix
14
flake.nix
|
|
@ -2,7 +2,7 @@
|
||||||
description = "republisher-redux - offline RSS and Atom feed mirroring";
|
description = "republisher-redux - offline RSS and Atom feed mirroring";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1";
|
nixpkgs.url = "github:nixos/nixpkgs/nixos-26.05";
|
||||||
treefmt-nix = {
|
treefmt-nix = {
|
||||||
url = "github:numtide/treefmt-nix";
|
url = "github:numtide/treefmt-nix";
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
|
@ -63,6 +63,12 @@
|
||||||
feedgen = prev.feedgen.overrideAttrs (old: {
|
feedgen = prev.feedgen.overrideAttrs (old: {
|
||||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
|
||||||
});
|
});
|
||||||
|
pyvips = prev.pyvips.overrideAttrs (old: {
|
||||||
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
||||||
|
final.setuptools
|
||||||
|
final.pkgconfig
|
||||||
|
];
|
||||||
|
});
|
||||||
pygea = prev.pygea.overrideAttrs (old: {
|
pygea = prev.pygea.overrideAttrs (old: {
|
||||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
||||||
final.hatchling
|
final.hatchling
|
||||||
|
|
@ -108,6 +114,7 @@
|
||||||
checkPhase = ''
|
checkPhase = ''
|
||||||
runHook preCheck
|
runHook preCheck
|
||||||
export HOME="$(mktemp -d)"
|
export HOME="$(mktemp -d)"
|
||||||
|
export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ pkgs.vips ]}:$LD_LIBRARY_PATH"
|
||||||
pytest tests/ -v
|
pytest tests/ -v
|
||||||
runHook postCheck
|
runHook postCheck
|
||||||
'';
|
'';
|
||||||
|
|
@ -125,7 +132,8 @@
|
||||||
postBuild = ''
|
postBuild = ''
|
||||||
rm -f "$out/bin/repub"
|
rm -f "$out/bin/repub"
|
||||||
makeWrapper "${baseVenv}/bin/repub" "$out/bin/repub" \
|
makeWrapper "${baseVenv}/bin/repub" "$out/bin/repub" \
|
||||||
--prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}"
|
--prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}" \
|
||||||
|
--prefix LD_LIBRARY_PATH : "${pkgs.lib.makeLibraryPath [ pkgs.vips ]}"
|
||||||
'';
|
'';
|
||||||
meta.mainProgram = "repub";
|
meta.mainProgram = "repub";
|
||||||
};
|
};
|
||||||
|
|
@ -273,12 +281,14 @@
|
||||||
packages = [
|
packages = [
|
||||||
pkgs.tailwindcss_4
|
pkgs.tailwindcss_4
|
||||||
pkgs.python313
|
pkgs.python313
|
||||||
|
pkgs.vips
|
||||||
pkgs.uv
|
pkgs.uv
|
||||||
pkgs.pyright
|
pkgs.pyright
|
||||||
(mkFfmpegPackage pkgs)
|
(mkFfmpegPackage pkgs)
|
||||||
];
|
];
|
||||||
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||||
pkgs.stdenv.cc.cc
|
pkgs.stdenv.cc.cc
|
||||||
|
pkgs.vips
|
||||||
];
|
];
|
||||||
env.UV_PROJECT_ENVIRONMENT = ".venv";
|
env.UV_PROJECT_ENVIRONMENT = ".venv";
|
||||||
env.UV_PYTHON_DOWNLOADS = "never";
|
env.UV_PYTHON_DOWNLOADS = "never";
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ dependencies = [
|
||||||
"colorlog>=6.8.2,<7.0.0",
|
"colorlog>=6.8.2,<7.0.0",
|
||||||
"feedparser>=6.0.11,<7.0.0",
|
"feedparser>=6.0.11,<7.0.0",
|
||||||
"lxml>=5.2.1,<6.0.0",
|
"lxml>=5.2.1,<6.0.0",
|
||||||
"pillow>=10.3.0,<11.0.0",
|
"pyvips>=3.0.0,<4.0.0",
|
||||||
"ffmpeg-python>=0.2.0,<0.3.0",
|
"ffmpeg-python>=0.2.0,<0.3.0",
|
||||||
"Quart>=0.20.0,<0.21.0",
|
"Quart>=0.20.0,<0.21.0",
|
||||||
"hypercorn>=0.18.0,<0.19.0",
|
"hypercorn>=0.18.0,<0.19.0",
|
||||||
|
|
|
||||||
|
|
@ -188,21 +188,31 @@ def build_feed_settings(
|
||||||
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
||||||
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
||||||
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
|
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
|
||||||
|
image_normalize_enabled = convert_images and base_settings.getbool(
|
||||||
|
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True
|
||||||
|
)
|
||||||
|
image_thumbnails_enabled = image_normalize_enabled and base_settings.getbool(
|
||||||
|
"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED", True
|
||||||
|
)
|
||||||
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
|
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
|
||||||
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
|
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
|
||||||
|
item_pipelines.pop("repub.pipelines.ImageNormalizePipeline", None)
|
||||||
|
item_pipelines.pop("repub.pipelines.ImageThumbnailPipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
|
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
|
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.FilePipeline", None)
|
item_pipelines.pop("repub.pipelines.FilePipeline", None)
|
||||||
item_pipelines.update(
|
item_pipelines.update(
|
||||||
{
|
{
|
||||||
"repub.pipelines.AudioPipeline": 2,
|
"repub.pipelines.AudioPipeline": 3,
|
||||||
"repub.pipelines.FilePipeline": 4,
|
"repub.pipelines.FilePipeline": 5,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if convert_images:
|
if image_normalize_enabled:
|
||||||
item_pipelines["repub.pipelines.ImagePipeline"] = 1
|
item_pipelines["repub.pipelines.ImageNormalizePipeline"] = 1
|
||||||
|
if image_thumbnails_enabled:
|
||||||
|
item_pipelines["repub.pipelines.ImageThumbnailPipeline"] = 2
|
||||||
if convert_video:
|
if convert_video:
|
||||||
item_pipelines["repub.pipelines.VideoPipeline"] = 3
|
item_pipelines["repub.pipelines.VideoPipeline"] = 4
|
||||||
settings = base_settings.copy()
|
settings = base_settings.copy()
|
||||||
settings.setdict(
|
settings.setdict(
|
||||||
{
|
{
|
||||||
|
|
@ -219,6 +229,8 @@ def build_feed_settings(
|
||||||
"LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
|
"LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
|
||||||
"HTTPCACHE_DIR": str(out_dir / "httpcache"),
|
"HTTPCACHE_DIR": str(out_dir / "httpcache"),
|
||||||
"REPUBLISHER_IMAGE_DIR": image_dir,
|
"REPUBLISHER_IMAGE_DIR": image_dir,
|
||||||
|
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED": image_normalize_enabled,
|
||||||
|
"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": image_thumbnails_enabled,
|
||||||
"REPUBLISHER_VIDEO_DIR": video_dir,
|
"REPUBLISHER_VIDEO_DIR": video_dir,
|
||||||
"REPUBLISHER_AUDIO_DIR": audio_dir,
|
"REPUBLISHER_AUDIO_DIR": audio_dir,
|
||||||
"REPUBLISHER_FILE_DIR": file_dir,
|
"REPUBLISHER_FILE_DIR": file_dir,
|
||||||
|
|
|
||||||
|
|
@ -9,12 +9,17 @@ from repub.items import (
|
||||||
ChannelElementItem,
|
ChannelElementItem,
|
||||||
ElementItem,
|
ElementItem,
|
||||||
MediaVariant,
|
MediaVariant,
|
||||||
|
ThumbnailVariant,
|
||||||
|
TranscodedImageFile,
|
||||||
TranscodedMediaFile,
|
TranscodedMediaFile,
|
||||||
)
|
)
|
||||||
from repub.utils import FileType, determine_file_type
|
from repub.utils import FileType, determine_file_type
|
||||||
|
|
||||||
MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
|
MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
|
||||||
MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
|
MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
|
||||||
|
MEDIA_THUMBNAIL_TAG = QName(rss.nsmap["media"], "thumbnail").text
|
||||||
|
ANYNEWS_SLOT_ATTR = QName(rss.nsmap["anynews"], "slot").text
|
||||||
|
ANYNEWS_TYPE_ATTR = QName(rss.nsmap["anynews"], "type").text
|
||||||
|
|
||||||
|
|
||||||
class RssExporter(BaseItemExporter):
|
class RssExporter(BaseItemExporter):
|
||||||
|
|
@ -52,7 +57,9 @@ class RssExporter(BaseItemExporter):
|
||||||
key: str(value) for key, value in attrib.items() if value not in (None, "")
|
key: str(value) for key, value in attrib.items() if value not in (None, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
def canonical_variant(self, media_file: TranscodedMediaFile) -> MediaVariant | None:
|
def canonical_variant(
|
||||||
|
self, media_file: TranscodedMediaFile | TranscodedImageFile
|
||||||
|
) -> MediaVariant | None:
|
||||||
for variant in media_file["variants"]:
|
for variant in media_file["variants"]:
|
||||||
if variant.get("isDefault") == "true":
|
if variant.get("isDefault") == "true":
|
||||||
return variant
|
return variant
|
||||||
|
|
@ -92,6 +99,8 @@ class RssExporter(BaseItemExporter):
|
||||||
def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
|
def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
|
||||||
fallbacks: dict[str, dict[str, str]] = {}
|
fallbacks: dict[str, dict[str, str]] = {}
|
||||||
managed_types: set[FileType] = set()
|
managed_types: set[FileType] = set()
|
||||||
|
if self.managed_image_files(item):
|
||||||
|
managed_types.add(FileType.IMAGE)
|
||||||
if item.audios:
|
if item.audios:
|
||||||
managed_types.add(FileType.AUDIO)
|
managed_types.add(FileType.AUDIO)
|
||||||
if item.videos:
|
if item.videos:
|
||||||
|
|
@ -100,6 +109,9 @@ class RssExporter(BaseItemExporter):
|
||||||
return fallbacks
|
return fallbacks
|
||||||
|
|
||||||
for child in list(item.el):
|
for child in list(item.el):
|
||||||
|
if child.tag == MEDIA_THUMBNAIL_TAG and FileType.IMAGE in managed_types:
|
||||||
|
item.el.remove(child)
|
||||||
|
continue
|
||||||
if child.tag == MEDIA_CONTENT_TAG:
|
if child.tag == MEDIA_CONTENT_TAG:
|
||||||
if self.owned_media_type(child, managed_types) is None:
|
if self.owned_media_type(child, managed_types) is None:
|
||||||
continue
|
continue
|
||||||
|
|
@ -113,25 +125,43 @@ class RssExporter(BaseItemExporter):
|
||||||
|
|
||||||
if child.tag != MEDIA_GROUP_TAG:
|
if child.tag != MEDIA_GROUP_TAG:
|
||||||
continue
|
continue
|
||||||
|
managed_image_group = False
|
||||||
for media_content in list(child):
|
for media_content in list(child):
|
||||||
if media_content.tag != MEDIA_CONTENT_TAG:
|
if media_content.tag != MEDIA_CONTENT_TAG:
|
||||||
continue
|
continue
|
||||||
if self.owned_media_type(media_content, managed_types) is None:
|
owned_type = self.owned_media_type(media_content, managed_types)
|
||||||
|
if owned_type is None:
|
||||||
continue
|
continue
|
||||||
|
if owned_type == FileType.IMAGE:
|
||||||
|
managed_image_group = True
|
||||||
fallbacks[media_content.get("url", "")] = {
|
fallbacks[media_content.get("url", "")] = {
|
||||||
key: value
|
key: value
|
||||||
for key, value in media_content.attrib.items()
|
for key, value in media_content.attrib.items()
|
||||||
if key in {"expression", "lang"}
|
if key in {"expression", "lang"}
|
||||||
}
|
}
|
||||||
child.remove(media_content)
|
child.remove(media_content)
|
||||||
|
if managed_image_group:
|
||||||
|
for media_thumbnail in list(child):
|
||||||
|
if media_thumbnail.tag == MEDIA_THUMBNAIL_TAG:
|
||||||
|
child.remove(media_thumbnail)
|
||||||
if len(child) == 0:
|
if len(child) == 0:
|
||||||
item.el.remove(child)
|
item.el.remove(child)
|
||||||
return fallbacks
|
return fallbacks
|
||||||
|
|
||||||
|
def managed_image_files(self, item: ElementItem) -> list[TranscodedImageFile]:
|
||||||
|
media_image_urls = set(item.media_image_urls)
|
||||||
|
if not media_image_urls:
|
||||||
|
return []
|
||||||
|
return [image for image in item.images if image["url"] in media_image_urls]
|
||||||
|
|
||||||
def append_media_groups(
|
def append_media_groups(
|
||||||
self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
|
self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
|
||||||
):
|
):
|
||||||
for media_file in [*item.audios, *item.videos]:
|
for media_file in [
|
||||||
|
*self.managed_image_files(item),
|
||||||
|
*item.audios,
|
||||||
|
*item.videos,
|
||||||
|
]:
|
||||||
if not media_file["variants"]:
|
if not media_file["variants"]:
|
||||||
continue
|
continue
|
||||||
fallback_attrib = fallbacks.get(media_file["published_url"], {})
|
fallback_attrib = fallbacks.get(media_file["published_url"], {})
|
||||||
|
|
@ -141,7 +171,11 @@ class RssExporter(BaseItemExporter):
|
||||||
**self.media_content_attrib(variant, fallback_attrib)
|
**self.media_content_attrib(variant, fallback_attrib)
|
||||||
)
|
)
|
||||||
for variant in media_file["variants"]
|
for variant in media_file["variants"]
|
||||||
]
|
],
|
||||||
|
*[
|
||||||
|
rss.MEDIA.thumbnail(**self.media_thumbnail_attrib(thumbnail))
|
||||||
|
for thumbnail in media_file.get("thumbnails", [])
|
||||||
|
],
|
||||||
)
|
)
|
||||||
if group is not None:
|
if group is not None:
|
||||||
item.el.append(group)
|
item.el.append(group)
|
||||||
|
|
@ -170,10 +204,22 @@ class RssExporter(BaseItemExporter):
|
||||||
)
|
)
|
||||||
return attrib
|
return attrib
|
||||||
|
|
||||||
|
def media_thumbnail_attrib(self, thumbnail: ThumbnailVariant) -> dict[str, str]:
|
||||||
|
attrib = self.compact_attrib(
|
||||||
|
url=thumbnail.get("url"),
|
||||||
|
width=thumbnail.get("width"),
|
||||||
|
height=thumbnail.get("height"),
|
||||||
|
)
|
||||||
|
if thumbnail.get("slot"):
|
||||||
|
attrib[ANYNEWS_SLOT_ATTR] = str(thumbnail["slot"])
|
||||||
|
if thumbnail.get("type"):
|
||||||
|
attrib[ANYNEWS_TYPE_ATTR] = str(thumbnail["type"])
|
||||||
|
return attrib
|
||||||
|
|
||||||
def apply_transcoded_media(self, item: Any) -> None:
|
def apply_transcoded_media(self, item: Any) -> None:
|
||||||
if not isinstance(item, ElementItem):
|
if not isinstance(item, ElementItem):
|
||||||
return
|
return
|
||||||
if not item.audios and not item.videos:
|
if not self.managed_image_files(item) and not item.audios and not item.videos:
|
||||||
return
|
return
|
||||||
self.rebuild_enclosures(item)
|
self.rebuild_enclosures(item)
|
||||||
fallbacks = self.strip_managed_media_nodes(item)
|
fallbacks = self.strip_managed_media_nodes(item)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from typing import Any, List, TypedDict
|
from typing import Any, List, TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,7 +8,7 @@ class MediaVariant(TypedDict, total=False):
|
||||||
type: str
|
type: str
|
||||||
medium: str
|
medium: str
|
||||||
isDefault: str
|
isDefault: str
|
||||||
fileSize: str
|
fileSize: int | str
|
||||||
bitrate: int | float | str
|
bitrate: int | float | str
|
||||||
samplingrate: int | str
|
samplingrate: int | str
|
||||||
channels: int | str
|
channels: int | str
|
||||||
|
|
@ -29,18 +29,39 @@ class TranscodedMediaFile(TypedDict):
|
||||||
variants: List[MediaVariant]
|
variants: List[MediaVariant]
|
||||||
|
|
||||||
|
|
||||||
|
class ThumbnailVariant(TypedDict, total=False):
|
||||||
|
url: str
|
||||||
|
path: str
|
||||||
|
width: int | str
|
||||||
|
height: int | str
|
||||||
|
slot: str
|
||||||
|
type: str
|
||||||
|
|
||||||
|
|
||||||
|
class TranscodedImageFile(TypedDict):
|
||||||
|
url: str
|
||||||
|
path: str
|
||||||
|
checksum: str | None
|
||||||
|
status: str
|
||||||
|
published_url: str
|
||||||
|
source_path: str
|
||||||
|
variants: List[MediaVariant]
|
||||||
|
thumbnails: List[ThumbnailVariant]
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ElementItem:
|
class ElementItem:
|
||||||
feed_name: str
|
feed_name: str
|
||||||
el: Any
|
el: Any
|
||||||
image_urls: List[str]
|
image_urls: List[str]
|
||||||
images: List[Any]
|
images: List[TranscodedImageFile]
|
||||||
file_urls: List[str]
|
file_urls: List[str]
|
||||||
files: List[Any]
|
files: List[Any]
|
||||||
audio_urls: List[str]
|
audio_urls: List[str]
|
||||||
audios: List[TranscodedMediaFile]
|
audios: List[TranscodedMediaFile]
|
||||||
video_urls: List[str]
|
video_urls: List[str]
|
||||||
videos: List[TranscodedMediaFile]
|
videos: List[TranscodedMediaFile]
|
||||||
|
media_image_urls: List[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -48,4 +69,5 @@ class ChannelElementItem:
|
||||||
feed_name: str
|
feed_name: str
|
||||||
el: Any
|
el: Any
|
||||||
image_urls: List[str]
|
image_urls: List[str]
|
||||||
images: List[Any]
|
images: List[TranscodedImageFile]
|
||||||
|
media_image_urls: List[str] = field(default_factory=list)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|
@ -8,24 +9,482 @@ from os import PathLike
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Union, cast
|
from typing import Any, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
|
import pyvips
|
||||||
from scrapy.crawler import Crawler
|
from scrapy.crawler import Crawler
|
||||||
from scrapy.pipelines.files import FileException
|
from scrapy.pipelines.files import FileException
|
||||||
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
||||||
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
|
||||||
|
|
||||||
import repub.utils
|
import repub.utils
|
||||||
from repub import media
|
from repub import media
|
||||||
from repub.items import MediaVariant, TranscodedMediaFile
|
from repub.items import (
|
||||||
|
MediaVariant,
|
||||||
|
ThumbnailVariant,
|
||||||
|
TranscodedImageFile,
|
||||||
|
TranscodedMediaFile,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ImagePipeline(BaseImagesPipeline):
|
class ImageException(FileException):
|
||||||
def file_path(self, request, response=None, info=None, *, item=None):
|
"""General image error exception"""
|
||||||
return repub.utils.local_image_path(request.url)
|
|
||||||
|
|
||||||
def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
|
|
||||||
raise NotImplementedError()
|
def image_mimetype(response=None, *, url: str | None = None) -> str | None:
|
||||||
|
del url
|
||||||
|
if response is not None:
|
||||||
|
content_type = response.headers.get(b"Content-Type")
|
||||||
|
if content_type:
|
||||||
|
return content_type.decode("utf-8").split(";", 1)[0].strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def image_loader_name(image: Any) -> str:
|
||||||
|
if image.get_typeof("vips-loader"):
|
||||||
|
return str(image.get("vips-loader"))
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def image_loader_mimetype(loader: str, fallback: str | None = None) -> str | None:
|
||||||
|
known = {
|
||||||
|
"jpegload": "image/jpeg",
|
||||||
|
"pngload": "image/png",
|
||||||
|
"gifload": "image/gif",
|
||||||
|
"svgload": "image/svg+xml",
|
||||||
|
"tiffload": "image/tiff",
|
||||||
|
"webpload": "image/webp",
|
||||||
|
"heifload": "image/heif",
|
||||||
|
"jxlload": "image/jxl",
|
||||||
|
}
|
||||||
|
for prefix, mimetype in known.items():
|
||||||
|
if loader.startswith(prefix):
|
||||||
|
return mimetype
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
|
||||||
|
def load_image_from_buffer(body: bytes) -> Any:
|
||||||
|
try:
|
||||||
|
return cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.new_from_buffer(body, "", access="sequential"),
|
||||||
|
)
|
||||||
|
except pyvips.Error as exc:
|
||||||
|
raise ImageException(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def load_image_from_file(file_path: str | Path) -> Any:
|
||||||
|
try:
|
||||||
|
return cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.new_from_file(str(file_path), access="sequential"),
|
||||||
|
)
|
||||||
|
except pyvips.Error as exc:
|
||||||
|
raise ImageException(str(exc)) from exc
|
||||||
|
|
||||||
|
|
||||||
|
def render_image_profile(source_path: str | Path, profile: dict[str, Any]) -> BytesIO:
|
||||||
|
transform = str(profile["transform"])
|
||||||
|
transform_kwargs = dict(profile.get("transform_kwargs", {}))
|
||||||
|
width = int(transform_kwargs.pop("width"))
|
||||||
|
if transform == "thumbnail":
|
||||||
|
image = cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.thumbnail(str(source_path), width, **transform_kwargs),
|
||||||
|
)
|
||||||
|
elif transform == "thumbnail_buffer":
|
||||||
|
image = cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.thumbnail_buffer(
|
||||||
|
Path(source_path).read_bytes(),
|
||||||
|
width,
|
||||||
|
**transform_kwargs,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ImageException(f"Unsupported image transform: {transform}")
|
||||||
|
|
||||||
|
image = image.colourspace("srgb")
|
||||||
|
if image.hasalpha() and (
|
||||||
|
profile["mimetype"] == "image/jpeg"
|
||||||
|
or "background" in profile.get("save_kwargs", {})
|
||||||
|
):
|
||||||
|
image = image.flatten(
|
||||||
|
background=profile.get("save_kwargs", {}).get("background", [255, 255, 255])
|
||||||
|
)
|
||||||
|
|
||||||
|
save_name = str(profile["save"])
|
||||||
|
try:
|
||||||
|
image_bytes = getattr(image, save_name)(**dict(profile.get("save_kwargs", {})))
|
||||||
|
except pyvips.Error as exc:
|
||||||
|
raise ImageException(str(exc)) from exc
|
||||||
|
return BytesIO(cast(bytes, image_bytes))
|
||||||
|
|
||||||
|
|
||||||
|
def image_buffer_meta(
|
||||||
|
body: bytes,
|
||||||
|
*,
|
||||||
|
fallback_mimetype: str | None = None,
|
||||||
|
) -> tuple[int, int, int, str | None]:
|
||||||
|
image = load_image_from_buffer(body)
|
||||||
|
mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
|
||||||
|
return image.width, image.height, len(body), mimetype
|
||||||
|
|
||||||
|
|
||||||
|
def image_variant_meta(
|
||||||
|
file_path: str | Path,
|
||||||
|
*,
|
||||||
|
fallback_mimetype: str | None = None,
|
||||||
|
) -> tuple[int, int, int, str | None]:
|
||||||
|
image = load_image_from_file(file_path)
|
||||||
|
mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
|
||||||
|
return image.width, image.height, Path(file_path).stat().st_size, mimetype
|
||||||
|
|
||||||
|
|
||||||
|
class ImageNormalizePipeline(BaseFilesPipeline):
|
||||||
|
MEDIA_NAME = "image"
|
||||||
|
EXPIRES = 90
|
||||||
|
MIN_WIDTH = 0
|
||||||
|
MIN_HEIGHT = 0
|
||||||
|
DEFAULT_FILES_URLS_FIELD = "image_urls"
|
||||||
|
DEFAULT_FILES_RESULT_FIELD = "images"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler: Crawler):
|
||||||
|
cls._update_stores(crawler.settings)
|
||||||
|
return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
|
||||||
|
|
||||||
|
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
||||||
|
self.settings = crawler.settings
|
||||||
|
super().__init__(store_uri, crawler=crawler)
|
||||||
|
resolve = functools.partial(
|
||||||
|
self._key_for_pipe,
|
||||||
|
base_class_name="ImagesPipeline",
|
||||||
|
settings=self.settings,
|
||||||
|
)
|
||||||
|
self.expires = self.settings.getint(resolve("IMAGES_EXPIRES"), self.EXPIRES)
|
||||||
|
self.files_urls_field = self.settings.get(
|
||||||
|
resolve("IMAGES_URLS_FIELD"),
|
||||||
|
self.DEFAULT_FILES_URLS_FIELD,
|
||||||
|
)
|
||||||
|
self.files_result_field = self.settings.get(
|
||||||
|
resolve("IMAGES_RESULT_FIELD"),
|
||||||
|
self.DEFAULT_FILES_RESULT_FIELD,
|
||||||
|
)
|
||||||
|
self.min_width = self.settings.getint(
|
||||||
|
resolve("IMAGES_MIN_WIDTH"),
|
||||||
|
self.MIN_WIDTH,
|
||||||
|
)
|
||||||
|
self.min_height = self.settings.getint(
|
||||||
|
resolve("IMAGES_MIN_HEIGHT"),
|
||||||
|
self.MIN_HEIGHT,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_image_settings(self) -> list[dict[str, Any]]:
|
||||||
|
return list(self.settings["REPUBLISHER_IMAGE"])
|
||||||
|
|
||||||
|
def file_path(self, request, response=None, info=None, *, item=None):
|
||||||
|
return repub.utils.canonical_published_image_path(
|
||||||
|
request.url,
|
||||||
|
self.get_image_settings(),
|
||||||
|
)
|
||||||
|
|
||||||
|
def source_path(self, request, response=None) -> str:
|
||||||
|
return repub.utils.source_image_path(
|
||||||
|
request.url,
|
||||||
|
image_mimetype(response, url=request.url),
|
||||||
|
)
|
||||||
|
|
||||||
|
def resolve_source_path(self, request, response=None) -> str:
|
||||||
|
source_path = self.source_path(request, response)
|
||||||
|
if response is not None:
|
||||||
|
return source_path
|
||||||
|
source_file = self.local_store_path(source_path)
|
||||||
|
if source_file.exists():
|
||||||
|
return source_path
|
||||||
|
source_dir = self.local_store_path(
|
||||||
|
str(self.settings.get("REPUBLISHER_IMAGE_SOURCE_SUBDIR", "source"))
|
||||||
|
)
|
||||||
|
guid = repub.utils.image_guid(request.url)
|
||||||
|
matches = sorted(source_dir.glob(f"{guid}.*"))
|
||||||
|
if matches:
|
||||||
|
return f"{source_dir.name}/{matches[0].name}"
|
||||||
|
return source_path
|
||||||
|
|
||||||
|
def variant_paths(self, source_url: str) -> list[tuple[bool, dict[str, Any], str]]:
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
index == 0,
|
||||||
|
setting,
|
||||||
|
repub.utils.published_image_path(source_url, setting),
|
||||||
|
)
|
||||||
|
for index, setting in enumerate(self.get_image_settings())
|
||||||
|
]
|
||||||
|
|
||||||
|
def published_url(self, path: str, item=None) -> str:
|
||||||
|
relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
|
||||||
|
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
||||||
|
if feed_url == "" or item is None:
|
||||||
|
return relative_path
|
||||||
|
return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
|
||||||
|
|
||||||
|
def local_store_path(self, path: str) -> Path:
|
||||||
|
return Path(cast(Any, self.store).basedir) / path
|
||||||
|
|
||||||
|
def image_variant(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
path: str,
|
||||||
|
mimetype: str,
|
||||||
|
width: int,
|
||||||
|
height: int,
|
||||||
|
file_size: int,
|
||||||
|
is_default: bool,
|
||||||
|
item=None,
|
||||||
|
) -> MediaVariant:
|
||||||
|
variant: MediaVariant = {
|
||||||
|
"url": self.published_url(path, item),
|
||||||
|
"path": path,
|
||||||
|
"type": mimetype,
|
||||||
|
"medium": repub.utils.FileType.IMAGE.value,
|
||||||
|
"isDefault": "true" if is_default else "false",
|
||||||
|
"fileSize": file_size,
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
}
|
||||||
|
return variant
|
||||||
|
|
||||||
|
def load_variants_from_disk(self, request, *, item=None) -> list[MediaVariant]:
|
||||||
|
variants: list[MediaVariant] = []
|
||||||
|
for is_default, setting, path in self.variant_paths(request.url):
|
||||||
|
file_path = self.local_store_path(path)
|
||||||
|
if not file_path.exists():
|
||||||
|
continue
|
||||||
|
width, height, file_size, mimetype = image_variant_meta(
|
||||||
|
file_path,
|
||||||
|
fallback_mimetype=setting["mimetype"],
|
||||||
|
)
|
||||||
|
variants.append(
|
||||||
|
self.image_variant(
|
||||||
|
path=path,
|
||||||
|
mimetype=mimetype or setting["mimetype"],
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
file_size=file_size,
|
||||||
|
is_default=is_default,
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return variants
|
||||||
|
|
||||||
|
def make_file_result(
|
||||||
|
self,
|
||||||
|
request,
|
||||||
|
*,
|
||||||
|
checksum: str | None,
|
||||||
|
status: str,
|
||||||
|
response=None,
|
||||||
|
item=None,
|
||||||
|
) -> TranscodedImageFile:
|
||||||
|
path = self.file_path(request, item=item)
|
||||||
|
return {
|
||||||
|
"url": request.url,
|
||||||
|
"path": path,
|
||||||
|
"published_url": self.published_url(path, item),
|
||||||
|
"checksum": checksum,
|
||||||
|
"status": status,
|
||||||
|
"source_path": self.resolve_source_path(request, response),
|
||||||
|
"variants": self.load_variants_from_disk(request, item=item),
|
||||||
|
"thumbnails": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
def media_to_download(self, request, info, *, item=None):
|
||||||
|
canonical_path = self.file_path(request, info=info, item=item)
|
||||||
|
canonical_stat = cast(
|
||||||
|
dict[str, Any] | None,
|
||||||
|
self.store.stat_file(canonical_path, info),
|
||||||
|
)
|
||||||
|
if not canonical_stat:
|
||||||
|
return None
|
||||||
|
last_modified = canonical_stat.get("last_modified")
|
||||||
|
if not last_modified:
|
||||||
|
return None
|
||||||
|
age_days = (time.time() - last_modified) / 60 / 60 / 24
|
||||||
|
if age_days > self.expires:
|
||||||
|
return None
|
||||||
|
if not cast(
|
||||||
|
dict[str, Any] | None,
|
||||||
|
self.store.stat_file(self.resolve_source_path(request), info),
|
||||||
|
):
|
||||||
|
return None
|
||||||
|
for _, _, path in self.variant_paths(request.url):
|
||||||
|
if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
|
||||||
|
return None
|
||||||
|
self.inc_stats("uptodate")
|
||||||
|
return self.make_file_result(
|
||||||
|
request,
|
||||||
|
checksum=canonical_stat.get("checksum"),
|
||||||
|
status="uptodate",
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
|
||||||
|
def persist_variants(self, response, request, info, *, item=None) -> str | None:
|
||||||
|
source_file_path = self.local_store_path(self.source_path(request, response))
|
||||||
|
source_buf = BytesIO(response.body)
|
||||||
|
source_image = load_image_from_buffer(response.body).autorot()
|
||||||
|
if source_image.width < self.min_width or source_image.height < self.min_height:
|
||||||
|
raise ImageException(
|
||||||
|
"Image too small "
|
||||||
|
f"({source_image.width}x{source_image.height} < "
|
||||||
|
f"{self.min_width}x{self.min_height})"
|
||||||
|
)
|
||||||
|
if not cast(
|
||||||
|
dict[str, Any] | None,
|
||||||
|
self.store.stat_file(self.source_path(request, response), info),
|
||||||
|
):
|
||||||
|
self.store.persist_file(
|
||||||
|
self.source_path(request, response),
|
||||||
|
source_buf,
|
||||||
|
info,
|
||||||
|
meta={"width": source_image.width, "height": source_image.height},
|
||||||
|
headers={
|
||||||
|
"Content-Type": image_loader_mimetype(
|
||||||
|
image_loader_name(source_image),
|
||||||
|
image_mimetype(response, url=request.url),
|
||||||
|
)
|
||||||
|
or "application/octet-stream"
|
||||||
|
},
|
||||||
|
)
|
||||||
|
canonical_path = self.file_path(
|
||||||
|
request, response=response, info=info, item=item
|
||||||
|
)
|
||||||
|
canonical_checksum = None
|
||||||
|
for _, setting, final_path in self.variant_paths(request.url):
|
||||||
|
stat = cast(dict[str, Any] | None, self.store.stat_file(final_path, info))
|
||||||
|
if stat:
|
||||||
|
if final_path == canonical_path:
|
||||||
|
canonical_checksum = stat.get("checksum")
|
||||||
|
continue
|
||||||
|
out_buf = render_image_profile(source_file_path, setting)
|
||||||
|
width, height, file_size, _ = image_buffer_meta(
|
||||||
|
out_buf.getvalue(),
|
||||||
|
fallback_mimetype=setting["mimetype"],
|
||||||
|
)
|
||||||
|
checksum = buffer_checksum(out_buf)
|
||||||
|
self.store.persist_file(
|
||||||
|
final_path,
|
||||||
|
out_buf,
|
||||||
|
info,
|
||||||
|
meta={"width": width, "height": height, "fileSize": file_size},
|
||||||
|
headers={"Content-Type": setting["mimetype"]},
|
||||||
|
)
|
||||||
|
if final_path == canonical_path:
|
||||||
|
canonical_checksum = checksum
|
||||||
|
return canonical_checksum
|
||||||
|
|
||||||
|
def media_downloaded(self, response, request, info, *, item=None):
|
||||||
|
if response.status != 200:
|
||||||
|
raise FileException("download-error")
|
||||||
|
if not response.body:
|
||||||
|
raise FileException("empty-content")
|
||||||
|
status = "cached" if "cached" in response.flags else "downloaded"
|
||||||
|
self.inc_stats(status)
|
||||||
|
checksum = self.persist_variants(response, request, info, item=item)
|
||||||
|
return self.make_file_result(
|
||||||
|
request,
|
||||||
|
checksum=checksum,
|
||||||
|
status=status,
|
||||||
|
response=response,
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ImageThumbnailPipeline:
|
||||||
|
@classmethod
|
||||||
|
def from_crawler(cls, crawler: Crawler):
|
||||||
|
return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
|
||||||
|
|
||||||
|
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
||||||
|
self.settings = crawler.settings
|
||||||
|
self.store_dir = Path(store_uri)
|
||||||
|
|
||||||
|
def get_thumbnail_settings(self) -> list[dict[str, Any]]:
|
||||||
|
return list(self.settings["REPUBLISHER_IMAGE_THUMBNAILS"])
|
||||||
|
|
||||||
|
def local_store_path(self, path: str) -> Path:
|
||||||
|
return self.store_dir / path
|
||||||
|
|
||||||
|
def published_url(self, path: str, item=None) -> str:
|
||||||
|
relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
|
||||||
|
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
||||||
|
if feed_url == "" or item is None:
|
||||||
|
return relative_path
|
||||||
|
return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
|
||||||
|
|
||||||
|
def persist_thumbnail(
|
||||||
|
self, source_file: Path, final_path: str, profile: dict[str, Any]
|
||||||
|
):
|
||||||
|
out_buf = render_image_profile(source_file, profile)
|
||||||
|
target = self.local_store_path(final_path)
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
target.write_bytes(out_buf.getvalue())
|
||||||
|
|
||||||
|
def load_thumbnail(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
source_url: str,
|
||||||
|
profile: dict[str, Any],
|
||||||
|
item=None,
|
||||||
|
) -> ThumbnailVariant | None:
|
||||||
|
final_path = repub.utils.thumbnail_image_path(source_url, profile)
|
||||||
|
file_path = self.local_store_path(final_path)
|
||||||
|
if not file_path.exists():
|
||||||
|
return None
|
||||||
|
width, height, _, mimetype = image_variant_meta(
|
||||||
|
file_path,
|
||||||
|
fallback_mimetype=profile["mimetype"],
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"url": self.published_url(final_path, item),
|
||||||
|
"path": final_path,
|
||||||
|
"slot": str(profile["name"]),
|
||||||
|
"type": mimetype or profile["mimetype"],
|
||||||
|
"width": width,
|
||||||
|
"height": height,
|
||||||
|
}
|
||||||
|
|
||||||
|
def process_item(self, item, spider):
|
||||||
|
del spider
|
||||||
|
if not getattr(item, "images", None):
|
||||||
|
return item
|
||||||
|
for image in item.images:
|
||||||
|
source_path = image.get("source_path")
|
||||||
|
if not source_path:
|
||||||
|
image["thumbnails"] = []
|
||||||
|
continue
|
||||||
|
source_file = self.local_store_path(source_path)
|
||||||
|
thumbnails: list[ThumbnailVariant] = []
|
||||||
|
for profile in self.get_thumbnail_settings():
|
||||||
|
final_path = repub.utils.thumbnail_image_path(image["url"], profile)
|
||||||
|
if not self.local_store_path(final_path).exists():
|
||||||
|
try:
|
||||||
|
self.persist_thumbnail(source_file, final_path, profile)
|
||||||
|
except ImageException as exc:
|
||||||
|
logger.warning(
|
||||||
|
"Failed to generate thumbnail for %s: %s", image["url"], exc
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
thumbnail = self.load_thumbnail(
|
||||||
|
source_url=image["url"],
|
||||||
|
profile=profile,
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
if thumbnail is not None:
|
||||||
|
thumbnails.append(thumbnail)
|
||||||
|
image["thumbnails"] = thumbnails
|
||||||
|
return item
|
||||||
|
|
||||||
|
|
||||||
|
ImagePipeline = ImageNormalizePipeline
|
||||||
|
|
||||||
|
|
||||||
class FilePipeline(BaseFilesPipeline):
|
class FilePipeline(BaseFilesPipeline):
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,7 @@ nsmap = {
|
||||||
"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
||||||
"dc": "http://purl.org/dc/elements/1.1/",
|
"dc": "http://purl.org/dc/elements/1.1/",
|
||||||
"atom": "http://www.w3.org/2005/Atom",
|
"atom": "http://www.w3.org/2005/Atom",
|
||||||
|
"anynews": "https://guardianproject.info/rss/anynews/1.0",
|
||||||
}
|
}
|
||||||
|
|
||||||
CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
|
CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,116 @@ LOG_LEVEL = "INFO"
|
||||||
|
|
||||||
MEDIA_ALLOW_REDIRECTS = True
|
MEDIA_ALLOW_REDIRECTS = True
|
||||||
|
|
||||||
|
REPUBLISHER_IMAGE_NORMALIZE_ENABLED = True
|
||||||
|
REPUBLISHER_IMAGE_THUMBNAILS_ENABLED = True
|
||||||
|
|
||||||
|
REPUBLISHER_IMAGE_DIR = "images"
|
||||||
|
REPUBLISHER_IMAGE_FULL_SUBDIR = "full"
|
||||||
|
REPUBLISHER_IMAGE_SOURCE_SUBDIR = "source"
|
||||||
|
REPUBLISHER_IMAGE_THUMBNAIL_SUBDIR = "thumbs"
|
||||||
|
|
||||||
|
REPUBLISHER_IMAGE = [
|
||||||
|
{
|
||||||
|
"name": "main_webp",
|
||||||
|
"mimetype": "image/webp",
|
||||||
|
"extension": "webp",
|
||||||
|
"transform": "thumbnail",
|
||||||
|
"transform_kwargs": {
|
||||||
|
"width": 1600,
|
||||||
|
"height": 1600,
|
||||||
|
"size": "down",
|
||||||
|
"no_rotate": False,
|
||||||
|
"linear": False,
|
||||||
|
"fail_on": "warning",
|
||||||
|
},
|
||||||
|
"save": "webpsave_buffer",
|
||||||
|
"save_kwargs": {
|
||||||
|
"Q": 82,
|
||||||
|
"preset": "photo",
|
||||||
|
"smart_subsample": True,
|
||||||
|
"effort": 4,
|
||||||
|
"alpha_q": 90,
|
||||||
|
"keep": "none",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "fallback_jpeg",
|
||||||
|
"mimetype": "image/jpeg",
|
||||||
|
"extension": "jpg",
|
||||||
|
"transform": "thumbnail",
|
||||||
|
"transform_kwargs": {
|
||||||
|
"width": 1600,
|
||||||
|
"height": 1600,
|
||||||
|
"size": "down",
|
||||||
|
"no_rotate": False,
|
||||||
|
"linear": False,
|
||||||
|
"fail_on": "warning",
|
||||||
|
},
|
||||||
|
"save": "jpegsave_buffer",
|
||||||
|
"save_kwargs": {
|
||||||
|
"Q": 85,
|
||||||
|
"interlace": True,
|
||||||
|
"optimize_coding": True,
|
||||||
|
"trellis_quant": True,
|
||||||
|
"optimize_scans": True,
|
||||||
|
"subsample_mode": "auto",
|
||||||
|
"keep": "none",
|
||||||
|
"background": [255, 255, 255],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
REPUBLISHER_IMAGE_THUMBNAILS = [
|
||||||
|
{
|
||||||
|
"name": "card_hero",
|
||||||
|
"mimetype": "image/jpeg",
|
||||||
|
"extension": "jpg",
|
||||||
|
"transform": "thumbnail",
|
||||||
|
"transform_kwargs": {
|
||||||
|
"width": 640,
|
||||||
|
"height": 360,
|
||||||
|
"size": "down",
|
||||||
|
"crop": "attention",
|
||||||
|
"no_rotate": False,
|
||||||
|
"linear": False,
|
||||||
|
"fail_on": "warning",
|
||||||
|
},
|
||||||
|
"save": "jpegsave_buffer",
|
||||||
|
"save_kwargs": {
|
||||||
|
"Q": 82,
|
||||||
|
"interlace": True,
|
||||||
|
"optimize_coding": True,
|
||||||
|
"subsample_mode": "auto",
|
||||||
|
"keep": "none",
|
||||||
|
"background": [255, 255, 255],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "list_square",
|
||||||
|
"mimetype": "image/jpeg",
|
||||||
|
"extension": "jpg",
|
||||||
|
"transform": "thumbnail",
|
||||||
|
"transform_kwargs": {
|
||||||
|
"width": 160,
|
||||||
|
"height": 160,
|
||||||
|
"size": "down",
|
||||||
|
"crop": "centre",
|
||||||
|
"no_rotate": False,
|
||||||
|
"linear": False,
|
||||||
|
"fail_on": "warning",
|
||||||
|
},
|
||||||
|
"save": "jpegsave_buffer",
|
||||||
|
"save_kwargs": {
|
||||||
|
"Q": 78,
|
||||||
|
"interlace": True,
|
||||||
|
"optimize_coding": True,
|
||||||
|
"subsample_mode": "auto",
|
||||||
|
"keep": "none",
|
||||||
|
"background": [255, 255, 255],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
REPUBLISHER_AUDIO = [
|
REPUBLISHER_AUDIO = [
|
||||||
{
|
{
|
||||||
"name": "mp3_vbr7_voice",
|
"name": "mp3_vbr7_voice",
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ from repub.rss import (
|
||||||
)
|
)
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
|
canonical_published_image_path,
|
||||||
canonical_published_media_path,
|
canonical_published_media_path,
|
||||||
determine_file_type,
|
determine_file_type,
|
||||||
local_file_path,
|
local_file_path,
|
||||||
|
|
@ -54,7 +55,16 @@ class BaseRssFeedSpider(Spider):
|
||||||
local_path = local_file_path(url)
|
local_path = local_file_path(url)
|
||||||
if file_type == FileType.IMAGE:
|
if file_type == FileType.IMAGE:
|
||||||
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
||||||
local_path = local_image_path(url)
|
image_profiles = (
|
||||||
|
self.settings.get("REPUBLISHER_IMAGE") or []
|
||||||
|
if self.settings.getbool("REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True)
|
||||||
|
else []
|
||||||
|
)
|
||||||
|
local_path = (
|
||||||
|
canonical_published_image_path(url, image_profiles)
|
||||||
|
if image_profiles
|
||||||
|
else local_image_path(url)
|
||||||
|
)
|
||||||
elif file_type == FileType.VIDEO:
|
elif file_type == FileType.VIDEO:
|
||||||
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
||||||
local_path = canonical_published_media_path(
|
local_path = canonical_published_media_path(
|
||||||
|
|
@ -278,6 +288,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
|
|
||||||
def parse_entry(self, response, feed, entry):
|
def parse_entry(self, response, feed, entry):
|
||||||
image_urls = []
|
image_urls = []
|
||||||
|
media_image_urls = []
|
||||||
file_urls = []
|
file_urls = []
|
||||||
audio_urls = []
|
audio_urls = []
|
||||||
video_urls = []
|
video_urls = []
|
||||||
|
|
@ -323,6 +334,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
)
|
)
|
||||||
if entry.get("image"):
|
if entry.get("image"):
|
||||||
image_urls.append(entry.get("image").href)
|
image_urls.append(entry.get("image").href)
|
||||||
|
media_image_urls.append(entry.get("image").href)
|
||||||
for enc in entry.enclosures:
|
for enc in entry.enclosures:
|
||||||
url = enc.get("href")
|
url = enc.get("href")
|
||||||
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
|
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
|
||||||
|
|
@ -381,6 +393,8 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
add_url(file_type, media.get("url"))
|
add_url(file_type, media.get("url"))
|
||||||
|
if file_type == FileType.IMAGE:
|
||||||
|
media_image_urls.append(media.get("url"))
|
||||||
return ElementItem(
|
return ElementItem(
|
||||||
feed_name=self.feed_name,
|
feed_name=self.feed_name,
|
||||||
el=item,
|
el=item,
|
||||||
|
|
@ -392,6 +406,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
audios=[],
|
audios=[],
|
||||||
video_urls=video_urls,
|
video_urls=video_urls,
|
||||||
videos=[],
|
videos=[],
|
||||||
|
media_image_urls=media_image_urls,
|
||||||
)
|
)
|
||||||
|
|
||||||
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
|
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
/*! tailwindcss v4.2.1 | MIT License | https://tailwindcss.com */
|
/*! tailwindcss v4.3.0 | MIT License | https://tailwindcss.com */
|
||||||
@layer properties;
|
@layer properties;
|
||||||
@layer theme, base, components, utilities;
|
@layer theme, base, components, utilities;
|
||||||
@layer theme {
|
@layer theme {
|
||||||
|
|
@ -245,9 +245,6 @@
|
||||||
.inset-x-0 {
|
.inset-x-0 {
|
||||||
inset-inline: calc(var(--spacing) * 0);
|
inset-inline: calc(var(--spacing) * 0);
|
||||||
}
|
}
|
||||||
.start {
|
|
||||||
inset-inline-start: var(--spacing);
|
|
||||||
}
|
|
||||||
.top-0 {
|
.top-0 {
|
||||||
top: calc(var(--spacing) * 0);
|
top: calc(var(--spacing) * 0);
|
||||||
}
|
}
|
||||||
|
|
@ -419,6 +416,9 @@
|
||||||
.rotate-180 {
|
.rotate-180 {
|
||||||
rotate: 180deg;
|
rotate: 180deg;
|
||||||
}
|
}
|
||||||
|
.transform {
|
||||||
|
transform: var(--tw-rotate-x,) var(--tw-rotate-y,) var(--tw-rotate-z,) var(--tw-skew-x,) var(--tw-skew-y,);
|
||||||
|
}
|
||||||
.animate-pulse {
|
.animate-pulse {
|
||||||
animation: var(--animate-pulse);
|
animation: var(--animate-pulse);
|
||||||
}
|
}
|
||||||
|
|
@ -1221,6 +1221,26 @@
|
||||||
inherits: false;
|
inherits: false;
|
||||||
initial-value: 0;
|
initial-value: 0;
|
||||||
}
|
}
|
||||||
|
@property --tw-rotate-x {
|
||||||
|
syntax: "*";
|
||||||
|
inherits: false;
|
||||||
|
}
|
||||||
|
@property --tw-rotate-y {
|
||||||
|
syntax: "*";
|
||||||
|
inherits: false;
|
||||||
|
}
|
||||||
|
@property --tw-rotate-z {
|
||||||
|
syntax: "*";
|
||||||
|
inherits: false;
|
||||||
|
}
|
||||||
|
@property --tw-skew-x {
|
||||||
|
syntax: "*";
|
||||||
|
inherits: false;
|
||||||
|
}
|
||||||
|
@property --tw-skew-y {
|
||||||
|
syntax: "*";
|
||||||
|
inherits: false;
|
||||||
|
}
|
||||||
@property --tw-space-y-reverse {
|
@property --tw-space-y-reverse {
|
||||||
syntax: "*";
|
syntax: "*";
|
||||||
inherits: false;
|
inherits: false;
|
||||||
|
|
@ -1460,6 +1480,11 @@
|
||||||
--tw-translate-x: 0;
|
--tw-translate-x: 0;
|
||||||
--tw-translate-y: 0;
|
--tw-translate-y: 0;
|
||||||
--tw-translate-z: 0;
|
--tw-translate-z: 0;
|
||||||
|
--tw-rotate-x: initial;
|
||||||
|
--tw-rotate-y: initial;
|
||||||
|
--tw-rotate-z: initial;
|
||||||
|
--tw-skew-x: initial;
|
||||||
|
--tw-skew-y: initial;
|
||||||
--tw-space-y-reverse: 0;
|
--tw-space-y-reverse: 0;
|
||||||
--tw-space-x-reverse: 0;
|
--tw-space-x-reverse: 0;
|
||||||
--tw-divide-y-reverse: 0;
|
--tw-divide-y-reverse: 0;
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,50 @@ def local_audio_path(s: str) -> str:
|
||||||
return local_file_path(s)
|
return local_file_path(s)
|
||||||
|
|
||||||
|
|
||||||
|
def image_guid(source_url: str) -> str:
|
||||||
|
return hashlib.sha1(to_bytes(source_url)).hexdigest() # nosec
|
||||||
|
|
||||||
|
|
||||||
|
def image_extension(mimetype_or_extension: str | None, source_url: str = "") -> str:
|
||||||
|
if mimetype_or_extension:
|
||||||
|
if mimetype_or_extension.startswith("."):
|
||||||
|
extension = mimetype_or_extension
|
||||||
|
elif "/" in mimetype_or_extension:
|
||||||
|
extension = mimetypes.guess_extension(mimetype_or_extension) or ""
|
||||||
|
else:
|
||||||
|
extension = f".{mimetype_or_extension.lstrip('.')}"
|
||||||
|
if extension == ".jpe":
|
||||||
|
return ".jpg"
|
||||||
|
return extension
|
||||||
|
guessed = Path(source_url).suffix
|
||||||
|
if guessed == ".jpe":
|
||||||
|
return ".jpg"
|
||||||
|
if guessed:
|
||||||
|
return guessed
|
||||||
|
return ".img"
|
||||||
|
|
||||||
|
|
||||||
|
def source_image_path(source_url: str, mimetype_or_extension: str | None = None) -> str:
|
||||||
|
extension = image_extension(mimetype_or_extension, source_url)
|
||||||
|
return f"source/{image_guid(source_url)}{extension}"
|
||||||
|
|
||||||
|
|
||||||
|
def published_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
|
||||||
|
return variant_media_path(f"full/{image_guid(source_url)}", profile, hashed=True)
|
||||||
|
|
||||||
|
|
||||||
|
def canonical_published_image_path(
|
||||||
|
source_url: str, profiles: Sequence[Mapping[str, Any]]
|
||||||
|
) -> str:
|
||||||
|
if not profiles:
|
||||||
|
raise ValueError("Missing image normalization profiles")
|
||||||
|
return published_image_path(source_url, profiles[0])
|
||||||
|
|
||||||
|
|
||||||
|
def thumbnail_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
|
||||||
|
return variant_media_path(f"thumbs/{image_guid(source_url)}", profile, hashed=True)
|
||||||
|
|
||||||
|
|
||||||
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
||||||
settings = {
|
settings = {
|
||||||
key: value
|
key: value
|
||||||
|
|
@ -65,6 +109,8 @@ def variant_media_path(
|
||||||
def published_media_path(
|
def published_media_path(
|
||||||
file_type: FileType, source_url: str, profile: Mapping[str, Any]
|
file_type: FileType, source_url: str, profile: Mapping[str, Any]
|
||||||
) -> str:
|
) -> str:
|
||||||
|
if file_type == FileType.IMAGE:
|
||||||
|
return published_image_path(source_url, profile)
|
||||||
if file_type == FileType.AUDIO:
|
if file_type == FileType.AUDIO:
|
||||||
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
|
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
|
||||||
if file_type == FileType.VIDEO:
|
if file_type == FileType.VIDEO:
|
||||||
|
|
@ -79,6 +125,8 @@ def canonical_published_media_path(
|
||||||
raise ValueError(f"Missing transcode profiles for {file_type.value}")
|
raise ValueError(f"Missing transcode profiles for {file_type.value}")
|
||||||
# The first configured profile is the public URL contract. Reordering profiles
|
# The first configured profile is the public URL contract. Reordering profiles
|
||||||
# changes published URLs for already-mirrored media.
|
# changes published URLs for already-mirrored media.
|
||||||
|
if file_type == FileType.IMAGE:
|
||||||
|
return canonical_published_image_path(source_url, profiles)
|
||||||
return published_media_path(file_type, source_url, profiles[0])
|
return published_media_path(file_type, source_url, profiles[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,46 @@ def test_build_feed_settings_can_disable_image_and_video_conversion(
|
||||||
convert_video=False,
|
convert_video=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
|
assert (
|
||||||
|
"repub.pipelines.ImageNormalizePipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
|
)
|
||||||
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
|
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
|
assert feed_settings["REPUBLISHER_IMAGE_NORMALIZE_ENABLED"] is False
|
||||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
|
assert feed_settings["REPUBLISHER_IMAGE_THUMBNAILS_ENABLED"] is False
|
||||||
|
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 3
|
||||||
|
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
def test_build_feed_settings_respects_image_pipeline_feature_flags(
|
||||||
|
tmp_path: Path,
|
||||||
|
) -> None:
|
||||||
|
out_dir = (tmp_path / "mirror").resolve()
|
||||||
|
config = RepublisherConfig(
|
||||||
|
config_path=tmp_path / "repub.toml",
|
||||||
|
out_dir=out_dir,
|
||||||
|
feeds=(
|
||||||
|
FeedConfig(
|
||||||
|
name="Guardian Project Podcast",
|
||||||
|
slug="gp-pod",
|
||||||
|
url="https://guardianproject.info/podcast/podcast.xml",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
scrapy_settings={"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": False},
|
||||||
|
)
|
||||||
|
|
||||||
|
base_settings = build_base_settings(config)
|
||||||
|
feed_settings = build_feed_settings(
|
||||||
|
base_settings,
|
||||||
|
out_dir=out_dir,
|
||||||
|
feed_slug="gp-pod",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
feed_settings["ITEM_PIPELINES"]["repub.pipelines.ImageNormalizePipeline"] == 1
|
||||||
|
)
|
||||||
|
assert (
|
||||||
|
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -16,10 +16,12 @@ from repub.rss import nsmap
|
||||||
from repub.spiders.rss_spider import RssFeedSpider
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
|
canonical_published_image_path,
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
local_image_path,
|
|
||||||
local_video_path,
|
local_video_path,
|
||||||
|
published_image_path,
|
||||||
published_media_path,
|
published_media_path,
|
||||||
|
thumbnail_image_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
RSS_DATE_PATTERN = re.compile(
|
RSS_DATE_PATTERN = re.compile(
|
||||||
|
|
@ -44,6 +46,7 @@ def _serialize_feed(
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
|
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
"REPUBLISHER_FEED_URL": feed_url,
|
"REPUBLISHER_FEED_URL": feed_url,
|
||||||
|
|
@ -75,6 +78,18 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
source_video = "https://source.example/media/video.mp4"
|
source_video = "https://source.example/media/video.mp4"
|
||||||
channel_image = "https://source.example/media/channel.png"
|
channel_image = "https://source.example/media/channel.png"
|
||||||
item_image = "https://source.example/media/cover.jpg"
|
item_image = "https://source.example/media/cover.jpg"
|
||||||
|
image_main_path = published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
)
|
||||||
|
image_fallback_path = published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[1],
|
||||||
|
)
|
||||||
|
image_thumbnail_path = thumbnail_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
|
||||||
|
)
|
||||||
audio_base_path = local_audio_path(source_audio)
|
audio_base_path = local_audio_path(source_audio)
|
||||||
audio_default_path = published_media_path(
|
audio_default_path = published_media_path(
|
||||||
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
|
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
|
||||||
|
|
@ -94,6 +109,60 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_item(item: ElementItem) -> None:
|
def prepare_item(item: ElementItem) -> None:
|
||||||
|
item.images = [
|
||||||
|
{
|
||||||
|
"url": source_image,
|
||||||
|
"path": image_main_path,
|
||||||
|
"published_url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_main_path}",
|
||||||
|
),
|
||||||
|
"checksum": "image-default",
|
||||||
|
"status": "downloaded",
|
||||||
|
"source_path": "source/ignored.png",
|
||||||
|
"variants": [
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_main_path}",
|
||||||
|
),
|
||||||
|
"path": image_main_path,
|
||||||
|
"type": "image/webp",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "true",
|
||||||
|
"fileSize": "2345",
|
||||||
|
"width": "1200",
|
||||||
|
"height": "675",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_fallback_path}",
|
||||||
|
),
|
||||||
|
"path": image_fallback_path,
|
||||||
|
"type": "image/jpeg",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": "3456",
|
||||||
|
"width": "1200",
|
||||||
|
"height": "675",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_thumbnail_path}",
|
||||||
|
),
|
||||||
|
"path": image_thumbnail_path,
|
||||||
|
"slot": "card_hero",
|
||||||
|
"type": "image/jpeg",
|
||||||
|
"width": "640",
|
||||||
|
"height": "360",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
item.audios = [
|
item.audios = [
|
||||||
{
|
{
|
||||||
"url": source_audio,
|
"url": source_audio,
|
||||||
|
|
@ -261,6 +330,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
||||||
<enclosure url="{source_audio}" length="123" type="audio/mpeg" />
|
<enclosure url="{source_audio}" length="123" type="audio/mpeg" />
|
||||||
<content:encoded><![CDATA[<div mode="body" querystring="x=1"><img src="{source_image}" contenteditable="true"></div>]]></content:encoded>
|
<content:encoded><![CDATA[<div mode="body" querystring="x=1"><img src="{source_image}" contenteditable="true"></div>]]></content:encoded>
|
||||||
|
<media:content url="{source_image}" type="image/jpeg" medium="image" expression="full" lang="en" />
|
||||||
<media:content url="{source_video}" type="video/mp4" medium="video" expression="full" duration="60" width="640" height="360" lang="en" />
|
<media:content url="{source_video}" type="video/mp4" medium="video" expression="full" duration="60" width="640" height="360" lang="en" />
|
||||||
<itunes:summary><![CDATA[{long_summary}]]></itunes:summary>
|
<itunes:summary><![CDATA[{long_summary}]]></itunes:summary>
|
||||||
<itunes:image href="{item_image}" />
|
<itunes:image href="{item_image}" />
|
||||||
|
|
@ -288,7 +358,11 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
assert last_build_date == item_pub_date
|
assert last_build_date == item_pub_date
|
||||||
assert channel.findtext("itunes:explicit", namespaces=nsmap) == "false"
|
assert channel.findtext("itunes:explicit", namespaces=nsmap) == "false"
|
||||||
assert channel.findtext("./image/url") == (
|
assert channel.findtext("./image/url") == (
|
||||||
f"https://mirror.example/feeds/demo/images/{local_image_path(channel_image)}"
|
"https://mirror.example/feeds/demo/images/"
|
||||||
|
+ canonical_published_image_path(
|
||||||
|
channel_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
atom_self = channel.find("atom:link", namespaces=nsmap)
|
atom_self = channel.find("atom:link", namespaces=nsmap)
|
||||||
|
|
@ -318,9 +392,63 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
|
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
|
||||||
|
|
||||||
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
|
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
|
||||||
assert len(media_groups) == 2
|
assert len(media_groups) == 3
|
||||||
|
|
||||||
|
image_group = next(
|
||||||
|
group
|
||||||
|
for group in media_groups
|
||||||
|
if group.find("media:thumbnail", namespaces=nsmap) is not None
|
||||||
|
)
|
||||||
|
audio_group = next(
|
||||||
|
group
|
||||||
|
for group in media_groups
|
||||||
|
if group.findall("media:content", namespaces=nsmap)
|
||||||
|
and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "audio"
|
||||||
|
)
|
||||||
|
video_group = next(
|
||||||
|
group
|
||||||
|
for group in media_groups
|
||||||
|
if group.findall("media:content", namespaces=nsmap)
|
||||||
|
and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "video"
|
||||||
|
)
|
||||||
|
|
||||||
|
image_variants = image_group.findall("media:content", namespaces=nsmap)
|
||||||
|
assert [variant.attrib for variant in image_variants] == [
|
||||||
|
{
|
||||||
|
"url": (f"https://mirror.example/feeds/demo/images/" f"{image_main_path}"),
|
||||||
|
"type": "image/webp",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "true",
|
||||||
|
"expression": "full",
|
||||||
|
"lang": "en",
|
||||||
|
"height": "675",
|
||||||
|
"width": "1200",
|
||||||
|
"fileSize": "2345",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": (
|
||||||
|
f"https://mirror.example/feeds/demo/images/" f"{image_fallback_path}"
|
||||||
|
),
|
||||||
|
"type": "image/jpeg",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "false",
|
||||||
|
"expression": "full",
|
||||||
|
"lang": "en",
|
||||||
|
"height": "675",
|
||||||
|
"width": "1200",
|
||||||
|
"fileSize": "3456",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
thumbnails = image_group.findall("media:thumbnail", namespaces=nsmap)
|
||||||
|
assert len(thumbnails) == 1
|
||||||
|
assert thumbnails[0].attrib == {
|
||||||
|
"url": (f"https://mirror.example/feeds/demo/images/" f"{image_thumbnail_path}"),
|
||||||
|
"width": "640",
|
||||||
|
"height": "360",
|
||||||
|
f"{{{nsmap['anynews']}}}slot": "card_hero",
|
||||||
|
f"{{{nsmap['anynews']}}}type": "image/jpeg",
|
||||||
|
}
|
||||||
|
|
||||||
audio_group, video_group = media_groups
|
|
||||||
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
|
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
|
||||||
assert [variant.attrib for variant in audio_variants] == [
|
assert [variant.attrib for variant in audio_variants] == [
|
||||||
{
|
{
|
||||||
|
|
@ -428,7 +556,13 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
||||||
assert itunes_image is not None
|
assert itunes_image is not None
|
||||||
assert itunes_image.attrib == {
|
assert itunes_image.attrib == {
|
||||||
"href": f"https://mirror.example/feeds/demo/images/{local_image_path(item_image)}"
|
"href": (
|
||||||
|
"https://mirror.example/feeds/demo/images/"
|
||||||
|
+ canonical_published_image_path(
|
||||||
|
item_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE,
|
||||||
|
)
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
itunes_summary = root.findtext("./channel/item/itunes:summary", namespaces=nsmap)
|
itunes_summary = root.findtext("./channel/item/itunes:summary", namespaces=nsmap)
|
||||||
|
|
@ -494,3 +628,165 @@ def test_item_body_uses_description_only_when_content_is_also_present() -> None:
|
||||||
assert both_present.findtext("content:encoded", namespaces=nsmap) == (
|
assert both_present.findtext("content:encoded", namespaces=nsmap) == (
|
||||||
"<div>Full body</div>"
|
"<div>Full body</div>"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_exporter_does_not_emit_media_rss_for_inline_only_images() -> None:
|
||||||
|
source_image = "https://source.example/media/inline.jpg"
|
||||||
|
|
||||||
|
def prepare_item(item: ElementItem) -> None:
|
||||||
|
item.images = [
|
||||||
|
{
|
||||||
|
"url": source_image,
|
||||||
|
"path": published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
),
|
||||||
|
"published_url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
"images/"
|
||||||
|
+ published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"checksum": "inline-image",
|
||||||
|
"status": "downloaded",
|
||||||
|
"source_path": "source/inline.jpg",
|
||||||
|
"variants": [
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
"images/"
|
||||||
|
+ published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"path": published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
),
|
||||||
|
"type": "image/webp",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "true",
|
||||||
|
"width": "1200",
|
||||||
|
"height": "675",
|
||||||
|
"fileSize": "2345",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thumbnails": [],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
_, root = _serialize_feed(
|
||||||
|
feed_url="https://mirror.example",
|
||||||
|
prepare_item=prepare_item,
|
||||||
|
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0"
|
||||||
|
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
||||||
|
<channel>
|
||||||
|
<title>Demo Feed</title>
|
||||||
|
<link>https://source.example/feed</link>
|
||||||
|
<description>Demo description</description>
|
||||||
|
<item>
|
||||||
|
<title>Inline Image Only</title>
|
||||||
|
<link>https://source.example/inline</link>
|
||||||
|
<guid isPermaLink="false">inline-only</guid>
|
||||||
|
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
||||||
|
<content:encoded><![CDATA[<div><img src="{source_image}"></div>]]></content:encoded>
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert root.findall("./channel/item/media:group", namespaces=nsmap) == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_exporter_replaces_standalone_source_media_thumbnails() -> None:
|
||||||
|
source_image = "https://source.example/media/photo.jpg"
|
||||||
|
image_main_path = published_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE[0],
|
||||||
|
)
|
||||||
|
image_thumbnail_path = thumbnail_image_path(
|
||||||
|
source_image,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
|
||||||
|
)
|
||||||
|
|
||||||
|
def prepare_item(item: ElementItem) -> None:
|
||||||
|
item.images = [
|
||||||
|
{
|
||||||
|
"url": source_image,
|
||||||
|
"path": image_main_path,
|
||||||
|
"published_url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_main_path}",
|
||||||
|
),
|
||||||
|
"checksum": "image-default",
|
||||||
|
"status": "downloaded",
|
||||||
|
"source_path": "source/ignored.png",
|
||||||
|
"variants": [
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_main_path}",
|
||||||
|
),
|
||||||
|
"path": image_main_path,
|
||||||
|
"type": "image/webp",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "true",
|
||||||
|
"fileSize": "2345",
|
||||||
|
"width": "1200",
|
||||||
|
"height": "675",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"thumbnails": [
|
||||||
|
{
|
||||||
|
"url": _published_url(
|
||||||
|
"https://mirror.example",
|
||||||
|
f"images/{image_thumbnail_path}",
|
||||||
|
),
|
||||||
|
"path": image_thumbnail_path,
|
||||||
|
"slot": "card_hero",
|
||||||
|
"type": "image/jpeg",
|
||||||
|
"width": "640",
|
||||||
|
"height": "360",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
_, root = _serialize_feed(
|
||||||
|
feed_url="https://mirror.example",
|
||||||
|
prepare_item=prepare_item,
|
||||||
|
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<rss version="2.0"
|
||||||
|
xmlns:media="http://search.yahoo.com/mrss/">
|
||||||
|
<channel>
|
||||||
|
<title>Demo Feed</title>
|
||||||
|
<link>https://source.example/feed</link>
|
||||||
|
<description>Demo description</description>
|
||||||
|
<item>
|
||||||
|
<title>Entry One</title>
|
||||||
|
<link>https://source.example/entry-1</link>
|
||||||
|
<guid isPermaLink="false">entry-1</guid>
|
||||||
|
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
||||||
|
<media:content url="{source_image}" type="image/jpeg" medium="image" />
|
||||||
|
<media:thumbnail url="https://source.example/media/source-thumb.jpg" width="10" height="10" />
|
||||||
|
</item>
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
|
||||||
|
thumbnails = root.findall("./channel/item/media:thumbnail", namespaces=nsmap)
|
||||||
|
assert thumbnails == []
|
||||||
|
group_thumbnails = root.findall(
|
||||||
|
"./channel/item/media:group/media:thumbnail",
|
||||||
|
namespaces=nsmap,
|
||||||
|
)
|
||||||
|
assert len(group_thumbnails) == 1
|
||||||
|
assert group_thumbnails[0].get("url") == (
|
||||||
|
f"https://mirror.example/feeds/demo/images/{image_thumbnail_path}"
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,13 @@ from repub import settings as repub_settings
|
||||||
from repub.spiders.rss_spider import RssFeedSpider
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
|
canonical_published_image_path,
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
local_image_path,
|
local_image_path,
|
||||||
local_video_path,
|
local_video_path,
|
||||||
|
published_image_path,
|
||||||
published_media_path,
|
published_media_path,
|
||||||
|
thumbnail_image_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -57,14 +60,17 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
|
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert spider.rewrite_image_url(
|
||||||
spider.rewrite_image_url("https://example.com/media/photo.jpg")
|
"https://example.com/media/photo.jpg"
|
||||||
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
|
) == "images/" + canonical_published_image_path(
|
||||||
|
"https://example.com/media/photo.jpg",
|
||||||
|
repub_settings.REPUBLISHER_IMAGE,
|
||||||
)
|
)
|
||||||
assert spider.rewrite_file_url(
|
assert spider.rewrite_file_url(
|
||||||
FileType.AUDIO,
|
FileType.AUDIO,
|
||||||
|
|
@ -90,6 +96,28 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_spider_keeps_legacy_image_paths_when_image_normalization_disabled() -> (
|
||||||
|
None
|
||||||
|
):
|
||||||
|
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
|
||||||
|
spider.settings = Settings(
|
||||||
|
values={
|
||||||
|
"REPUBLISHER_IMAGE_DIR": "images",
|
||||||
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
|
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED": False,
|
||||||
|
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
||||||
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert spider.rewrite_image_url("https://example.com/media/photo.jpg") == (
|
||||||
|
f"images/{local_image_path('https://example.com/media/photo.jpg')}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_published_media_path_changes_when_profile_args_change() -> None:
|
def test_published_media_path_changes_when_profile_args_change() -> None:
|
||||||
source_url = "https://example.com/media/clip.mp4"
|
source_url = "https://example.com/media/clip.mp4"
|
||||||
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
|
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
|
||||||
|
|
@ -113,6 +141,41 @@ def test_published_media_path_changes_when_profile_args_change() -> None:
|
||||||
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
||||||
|
|
||||||
|
|
||||||
|
def test_published_image_and_thumbnail_paths_change_when_profile_args_change() -> None:
|
||||||
|
source_url = "https://example.com/media/photo.png"
|
||||||
|
base_image_profile = repub_settings.REPUBLISHER_IMAGE[0]
|
||||||
|
base_thumbnail_profile = repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0]
|
||||||
|
|
||||||
|
assert canonical_published_image_path(
|
||||||
|
source_url,
|
||||||
|
repub_settings.REPUBLISHER_IMAGE,
|
||||||
|
) == published_image_path(source_url, base_image_profile)
|
||||||
|
|
||||||
|
changed_image_profile = {
|
||||||
|
**base_image_profile,
|
||||||
|
"transform_kwargs": {
|
||||||
|
**base_image_profile["transform_kwargs"],
|
||||||
|
"width": 2048,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
assert published_image_path(
|
||||||
|
source_url,
|
||||||
|
changed_image_profile,
|
||||||
|
) != published_image_path(source_url, base_image_profile)
|
||||||
|
|
||||||
|
changed_thumbnail_profile = {
|
||||||
|
**base_thumbnail_profile,
|
||||||
|
"save_kwargs": {
|
||||||
|
**base_thumbnail_profile["save_kwargs"],
|
||||||
|
"Q": 60,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
assert thumbnail_image_path(
|
||||||
|
source_url,
|
||||||
|
changed_thumbnail_profile,
|
||||||
|
) != thumbnail_image_path(source_url, base_thumbnail_profile)
|
||||||
|
|
||||||
|
|
||||||
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||||
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
||||||
|
|
@ -138,6 +201,7 @@ def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
|
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from types import SimpleNamespace
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import pyvips
|
||||||
from scrapy.crawler import Crawler
|
from scrapy.crawler import Crawler
|
||||||
from scrapy.http import Request, Response
|
from scrapy.http import Request, Response
|
||||||
|
|
||||||
|
|
@ -16,12 +17,23 @@ from repub.config import (
|
||||||
build_feed_settings,
|
build_feed_settings,
|
||||||
)
|
)
|
||||||
from repub.items import ElementItem
|
from repub.items import ElementItem
|
||||||
from repub.pipelines import AudioPipeline, FilePipeline, VideoPipeline
|
from repub.pipelines import (
|
||||||
|
AudioPipeline,
|
||||||
|
FilePipeline,
|
||||||
|
ImageNormalizePipeline,
|
||||||
|
ImageThumbnailPipeline,
|
||||||
|
VideoPipeline,
|
||||||
|
image_mimetype,
|
||||||
|
)
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
|
canonical_published_image_path,
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
local_video_path,
|
local_video_path,
|
||||||
|
published_image_path,
|
||||||
published_media_path,
|
published_media_path,
|
||||||
|
source_image_path,
|
||||||
|
thumbnail_image_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -45,17 +57,33 @@ def build_test_crawler(tmp_path: Path) -> SimpleNamespace:
|
||||||
return SimpleNamespace(settings=settings, request_fingerprinter=object())
|
return SimpleNamespace(settings=settings, request_fingerprinter=object())
|
||||||
|
|
||||||
|
|
||||||
|
class HashableSpiderInfo:
|
||||||
|
__hash__ = object.__hash__
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.spider = SimpleNamespace()
|
||||||
|
|
||||||
|
|
||||||
def spider_info() -> Any:
|
def spider_info() -> Any:
|
||||||
return SimpleNamespace(spider=SimpleNamespace())
|
return HashableSpiderInfo()
|
||||||
|
|
||||||
|
|
||||||
def store_dir(pipeline: Any) -> Path:
|
def store_dir(pipeline: Any) -> Path:
|
||||||
return Path(cast(Any, pipeline.store).basedir)
|
return Path(cast(Any, pipeline.store).basedir)
|
||||||
|
|
||||||
|
|
||||||
|
def transparent_png_bytes() -> bytes:
|
||||||
|
return cast(Any, pyvips.Image.black(2, 3, bands=4)).pngsave_buffer()
|
||||||
|
|
||||||
|
|
||||||
|
def png_bytes(width: int, height: int, *, bands: int = 4) -> bytes:
|
||||||
|
return cast(Any, pyvips.Image.black(width, height, bands=bands)).pngsave_buffer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("pipeline_cls", "store_setting"),
|
("pipeline_cls", "store_setting"),
|
||||||
[
|
[
|
||||||
|
(ImageNormalizePipeline, "IMAGES_STORE"),
|
||||||
(AudioPipeline, "AUDIO_STORE"),
|
(AudioPipeline, "AUDIO_STORE"),
|
||||||
(VideoPipeline, "VIDEO_STORE"),
|
(VideoPipeline, "VIDEO_STORE"),
|
||||||
(FilePipeline, "FILES_STORE"),
|
(FilePipeline, "FILES_STORE"),
|
||||||
|
|
@ -630,6 +658,220 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
assert completed_item.audios == [result]
|
assert completed_item.audios == [result]
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_mimetype_does_not_guess_from_url_extension() -> None:
|
||||||
|
assert image_mimetype(url="https://example.com/photo.jpg") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_normalize_pipeline_media_downloaded_persists_source_and_variants(
|
||||||
|
monkeypatch, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
crawler = build_test_crawler(tmp_path)
|
||||||
|
pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
||||||
|
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
||||||
|
source_url = "https://example.com/photo.png"
|
||||||
|
item = ElementItem(
|
||||||
|
feed_name="nasa",
|
||||||
|
el=None,
|
||||||
|
image_urls=[source_url],
|
||||||
|
images=[],
|
||||||
|
file_urls=[],
|
||||||
|
files=[],
|
||||||
|
audio_urls=[],
|
||||||
|
audios=[],
|
||||||
|
video_urls=[],
|
||||||
|
videos=[],
|
||||||
|
)
|
||||||
|
canonical_path = canonical_published_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE"],
|
||||||
|
)
|
||||||
|
source_path = source_image_path(source_url, "image/png")
|
||||||
|
webp_path = published_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE"][0],
|
||||||
|
)
|
||||||
|
jpeg_path = published_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE"][1],
|
||||||
|
)
|
||||||
|
source_body = transparent_png_bytes()
|
||||||
|
|
||||||
|
result = pipeline.media_downloaded(
|
||||||
|
Response(
|
||||||
|
url=source_url,
|
||||||
|
body=source_body,
|
||||||
|
status=200,
|
||||||
|
headers={"Content-Type": "image/png"},
|
||||||
|
),
|
||||||
|
Request(source_url),
|
||||||
|
spider_info(),
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
webp_file_size = result["variants"][0].get("fileSize")
|
||||||
|
jpeg_file_size = result["variants"][1].get("fileSize")
|
||||||
|
|
||||||
|
assert result == {
|
||||||
|
"url": source_url,
|
||||||
|
"path": canonical_path,
|
||||||
|
"published_url": f"https://mirror.example/feeds/nasa/images/{canonical_path}",
|
||||||
|
"checksum": result["checksum"],
|
||||||
|
"status": "downloaded",
|
||||||
|
"source_path": source_path,
|
||||||
|
"variants": [
|
||||||
|
{
|
||||||
|
"url": f"https://mirror.example/feeds/nasa/images/{webp_path}",
|
||||||
|
"path": webp_path,
|
||||||
|
"type": "image/webp",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "true",
|
||||||
|
"fileSize": webp_file_size,
|
||||||
|
"width": 2,
|
||||||
|
"height": 3,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": f"https://mirror.example/feeds/nasa/images/{jpeg_path}",
|
||||||
|
"path": jpeg_path,
|
||||||
|
"type": "image/jpeg",
|
||||||
|
"medium": "image",
|
||||||
|
"isDefault": "false",
|
||||||
|
"fileSize": jpeg_file_size,
|
||||||
|
"width": 2,
|
||||||
|
"height": 3,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
"thumbnails": [],
|
||||||
|
}
|
||||||
|
assert isinstance(result["checksum"], str)
|
||||||
|
assert isinstance(webp_file_size, int)
|
||||||
|
assert isinstance(jpeg_file_size, int)
|
||||||
|
assert (store_dir(pipeline) / source_path).read_bytes() == source_body
|
||||||
|
webp_image = cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.new_from_file(str(store_dir(pipeline) / webp_path)),
|
||||||
|
)
|
||||||
|
jpeg_image = cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.new_from_file(str(store_dir(pipeline) / jpeg_path)),
|
||||||
|
)
|
||||||
|
assert (webp_image.width, webp_image.height) == (2, 3)
|
||||||
|
assert (jpeg_image.width, jpeg_image.height) == (2, 3)
|
||||||
|
assert jpeg_image.bands == 3
|
||||||
|
|
||||||
|
completed_item = pipeline.item_completed([(True, result)], item, spider_info())
|
||||||
|
assert completed_item.images == [result]
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_thumbnail_pipeline_generates_named_thumbnails_from_source_image(
|
||||||
|
monkeypatch, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
crawler = build_test_crawler(tmp_path)
|
||||||
|
normalize_pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
||||||
|
thumbnail_pipeline = ImageThumbnailPipeline.from_crawler(cast(Crawler, crawler))
|
||||||
|
monkeypatch.setattr(normalize_pipeline, "inc_stats", lambda status: None)
|
||||||
|
source_url = "https://example.com/photo.png"
|
||||||
|
source_body = png_bytes(1200, 900)
|
||||||
|
item = ElementItem(
|
||||||
|
feed_name="nasa",
|
||||||
|
el=None,
|
||||||
|
image_urls=[source_url],
|
||||||
|
images=[],
|
||||||
|
file_urls=[],
|
||||||
|
files=[],
|
||||||
|
audio_urls=[],
|
||||||
|
audios=[],
|
||||||
|
video_urls=[],
|
||||||
|
videos=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
normalized = normalize_pipeline.media_downloaded(
|
||||||
|
Response(
|
||||||
|
url=source_url,
|
||||||
|
body=source_body,
|
||||||
|
status=200,
|
||||||
|
headers={"Content-Type": "image/png"},
|
||||||
|
),
|
||||||
|
Request(source_url),
|
||||||
|
spider_info(),
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
item.images = [normalized]
|
||||||
|
|
||||||
|
processed = thumbnail_pipeline.process_item(item, spider_info().spider)
|
||||||
|
thumbnails = processed.images[0]["thumbnails"]
|
||||||
|
thumb_slots = [thumb.get("slot") for thumb in thumbnails]
|
||||||
|
first_thumb = thumbnails[0]
|
||||||
|
second_thumb = thumbnails[1]
|
||||||
|
|
||||||
|
assert processed.images[0]["path"] == canonical_published_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE"],
|
||||||
|
)
|
||||||
|
assert thumb_slots == ["card_hero", "list_square"]
|
||||||
|
assert first_thumb.get("path") == thumbnail_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][0],
|
||||||
|
)
|
||||||
|
assert first_thumb.get("type") == "image/jpeg"
|
||||||
|
assert first_thumb.get("width") == 640
|
||||||
|
assert first_thumb.get("height") == 360
|
||||||
|
assert second_thumb.get("path") == thumbnail_image_path(
|
||||||
|
source_url,
|
||||||
|
crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][1],
|
||||||
|
)
|
||||||
|
assert second_thumb.get("width") == 160
|
||||||
|
assert second_thumb.get("height") == 160
|
||||||
|
for thumb in thumbnails:
|
||||||
|
thumb_path = thumb.get("path")
|
||||||
|
thumb_width = thumb.get("width")
|
||||||
|
thumb_height = thumb.get("height")
|
||||||
|
thumb_image = cast(
|
||||||
|
Any,
|
||||||
|
pyvips.Image.new_from_file(
|
||||||
|
str(store_dir(normalize_pipeline) / str(thumb_path))
|
||||||
|
),
|
||||||
|
)
|
||||||
|
assert (thumb_image.width, thumb_image.height) == (thumb_width, thumb_height)
|
||||||
|
|
||||||
|
|
||||||
|
def test_image_normalize_pipeline_cache_hit_keeps_persisted_source_path_for_extensionless_urls(
|
||||||
|
monkeypatch, tmp_path: Path
|
||||||
|
) -> None:
|
||||||
|
crawler = build_test_crawler(tmp_path)
|
||||||
|
pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
||||||
|
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
||||||
|
source_url = "https://example.com/photo"
|
||||||
|
item = ElementItem(
|
||||||
|
feed_name="nasa",
|
||||||
|
el=None,
|
||||||
|
image_urls=[source_url],
|
||||||
|
images=[],
|
||||||
|
file_urls=[],
|
||||||
|
files=[],
|
||||||
|
audio_urls=[],
|
||||||
|
audios=[],
|
||||||
|
video_urls=[],
|
||||||
|
videos=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
downloaded = pipeline.media_downloaded(
|
||||||
|
Response(
|
||||||
|
url=source_url,
|
||||||
|
body=transparent_png_bytes(),
|
||||||
|
status=200,
|
||||||
|
headers={"Content-Type": "image/png"},
|
||||||
|
),
|
||||||
|
Request(source_url),
|
||||||
|
spider_info(),
|
||||||
|
item=item,
|
||||||
|
)
|
||||||
|
|
||||||
|
uptodate = pipeline.media_to_download(Request(source_url), spider_info(), item=item)
|
||||||
|
|
||||||
|
assert downloaded["source_path"].endswith(".png")
|
||||||
|
assert uptodate is not None
|
||||||
|
assert uptodate["source_path"] == downloaded["source_path"]
|
||||||
|
|
||||||
|
|
||||||
def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
||||||
monkeypatch, tmp_path: Path
|
monkeypatch, tmp_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
|
||||||
32
uv.lock
generated
32
uv.lock
generated
|
|
@ -812,25 +812,6 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl", hash = "sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417", size = 411885, upload-time = "2026-01-07T17:24:58.33Z" },
|
{ url = "https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl", hash = "sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417", size = 411885, upload-time = "2026-01-07T17:24:58.33Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pillow"
|
|
||||||
version = "10.4.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/cd/74/ad3d526f3bf7b6d3f408b73fde271ec69dfac8b81341a318ce825f2b3812/pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06", size = 46555059, upload-time = "2024-07-01T09:48:43.583Z" }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c3/00/706cebe7c2c12a6318aabe5d354836f54adff7156fd9e1bd6c89f4ba0e98/pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3", size = 3525685, upload-time = "2024-07-01T09:46:45.194Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cf/76/f658cbfa49405e5ecbfb9ba42d07074ad9792031267e782d409fd8fe7c69/pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb", size = 3374883, upload-time = "2024-07-01T09:46:47.331Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/46/2b/99c28c4379a85e65378211971c0b430d9c7234b1ec4d59b2668f6299e011/pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70", size = 4339837, upload-time = "2024-07-01T09:46:49.647Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f1/74/b1ec314f624c0c43711fdf0d8076f82d9d802afd58f1d62c2a86878e8615/pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be", size = 4455562, upload-time = "2024-07-01T09:46:51.811Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4a/2a/4b04157cb7b9c74372fa867096a1607e6fedad93a44deeff553ccd307868/pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0", size = 4366761, upload-time = "2024-07-01T09:46:53.961Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/7b/8f1d815c1a6a268fe90481232c98dd0e5fa8c75e341a75f060037bd5ceae/pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc", size = 4536767, upload-time = "2024-07-01T09:46:56.664Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e5/77/05fa64d1f45d12c22c314e7b97398ffb28ef2813a485465017b7978b3ce7/pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a", size = 4477989, upload-time = "2024-07-01T09:46:58.977Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/12/63/b0397cfc2caae05c3fb2f4ed1b4fc4fc878f0243510a7a6034ca59726494/pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309", size = 4610255, upload-time = "2024-07-01T09:47:01.189Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7b/f9/cfaa5082ca9bc4a6de66ffe1c12c2d90bf09c309a5f52b27759a596900e7/pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060", size = 2235603, upload-time = "2024-07-01T09:47:03.918Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/01/6a/30ff0eef6e0c0e71e55ded56a38d4859bf9d3634a94a88743897b5f96936/pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea", size = 2554972, upload-time = "2024-07-01T09:47:06.152Z" },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/48/2c/2e0a52890f269435eee38b21c8218e102c621fe8d8df8b9dd06fabf879ba/pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", size = 2243375, upload-time = "2024-07-01T09:47:09.065Z" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "platformdirs"
|
name = "platformdirs"
|
||||||
version = "4.9.4"
|
version = "4.9.4"
|
||||||
|
|
@ -1012,6 +993,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyvips"
|
||||||
|
version = "3.1.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "cffi" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/282936de9faac6addf6bc8792c18e006489d0023ffd8856b8643f54d0558/pyvips-3.1.1.tar.gz", hash = "sha256:84fe744d023b1084ac2516bb17064cacd41c7f8aabf8e524dd383534941b9301", size = 56951, upload-time = "2025-12-09T18:38:06.355Z" }
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyyaml"
|
name = "pyyaml"
|
||||||
version = "6.0.3"
|
version = "6.0.3"
|
||||||
|
|
@ -1093,10 +1083,10 @@ dependencies = [
|
||||||
{ name = "hypercorn" },
|
{ name = "hypercorn" },
|
||||||
{ name = "lxml" },
|
{ name = "lxml" },
|
||||||
{ name = "peewee" },
|
{ name = "peewee" },
|
||||||
{ name = "pillow" },
|
|
||||||
{ name = "prometheus-client" },
|
{ name = "prometheus-client" },
|
||||||
{ name = "pygea" },
|
{ name = "pygea" },
|
||||||
{ name = "python-dateutil" },
|
{ name = "python-dateutil" },
|
||||||
|
{ name = "pyvips" },
|
||||||
{ name = "quart" },
|
{ name = "quart" },
|
||||||
{ name = "scrapy" },
|
{ name = "scrapy" },
|
||||||
]
|
]
|
||||||
|
|
@ -1126,10 +1116,10 @@ requires-dist = [
|
||||||
{ name = "hypercorn", specifier = ">=0.18.0,<0.19.0" },
|
{ name = "hypercorn", specifier = ">=0.18.0,<0.19.0" },
|
||||||
{ name = "lxml", specifier = ">=5.2.1,<6.0.0" },
|
{ name = "lxml", specifier = ">=5.2.1,<6.0.0" },
|
||||||
{ name = "peewee", specifier = ">=3.19.0,<4.0.0" },
|
{ name = "peewee", specifier = ">=3.19.0,<4.0.0" },
|
||||||
{ name = "pillow", specifier = ">=10.3.0,<11.0.0" },
|
|
||||||
{ name = "prometheus-client", specifier = ">=0.20.0,<0.21.0" },
|
{ name = "prometheus-client", specifier = ">=0.20.0,<0.21.0" },
|
||||||
{ name = "pygea", git = "https://guardianproject.dev/anynews/pygea.git" },
|
{ name = "pygea", git = "https://guardianproject.dev/anynews/pygea.git" },
|
||||||
{ name = "python-dateutil", specifier = ">=2.9.0.post0,<3.0.0" },
|
{ name = "python-dateutil", specifier = ">=2.9.0.post0,<3.0.0" },
|
||||||
|
{ name = "pyvips", specifier = ">=3.0.0,<4.0.0" },
|
||||||
{ name = "quart", specifier = ">=0.20.0,<0.21.0" },
|
{ name = "quart", specifier = ">=0.20.0,<0.21.0" },
|
||||||
{ name = "scrapy", specifier = ">=2.11.1,<3.0.0" },
|
{ name = "scrapy", specifier = ">=2.11.1,<3.0.0" },
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue