Compare commits
No commits in common. "18a7f652d4c4b6545d3a11c88325e5518bd5c631" and "180677efa71b97f0a9d9cd2d4ea0e4bcdac79a98" have entirely different histories.
18a7f652d4
...
180677efa7
17 changed files with 82 additions and 1463 deletions
42
flake.lock
generated
42
flake.lock
generated
|
|
@ -2,18 +2,16 @@
|
||||||
"nodes": {
|
"nodes": {
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1779622335,
|
"lastModified": 1774386573,
|
||||||
"narHash": "sha256-ViA62qtL5za7V3d5I8OA9q9JcFhsVAiL5jVHwEclWqk=",
|
"narHash": "sha256-4hAV26quOxdC6iyG7kYaZcM3VOskcPUrdCQd/nx8obc=",
|
||||||
"owner": "nixos",
|
"rev": "46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9",
|
||||||
"repo": "nixpkgs",
|
"revCount": 969196,
|
||||||
"rev": "705e9929918b43bd7b715dc0a878ac870449bb03",
|
"type": "tarball",
|
||||||
"type": "github"
|
"url": "https://api.flakehub.com/f/pinned/NixOS/nixpkgs/0.1.969196%2Brev-46db2e09e1d3f113a13c0d7b81e2f221c63b8ce9/019d279e-af65-79ce-92be-5dee7b1e36d4/source.tar.gz"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
"owner": "nixos",
|
"type": "tarball",
|
||||||
"ref": "nixos-26.05",
|
"url": "https://flakehub.com/f/NixOS/nixpkgs/0.1"
|
||||||
"repo": "nixpkgs",
|
|
||||||
"type": "github"
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"pyproject-build-systems": {
|
"pyproject-build-systems": {
|
||||||
|
|
@ -29,11 +27,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1779676664,
|
"lastModified": 1773870109,
|
||||||
"narHash": "sha256-MbXylBTkWqVm8/VYjoULtMoVRgWBN1gSHbeRKsOsPlU=",
|
"narHash": "sha256-ZoTdqZP03DcdoyxvpFHCAek4bkPUTUPUF3oCCgc3dP4=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "build-system-pkgs",
|
"repo": "build-system-pkgs",
|
||||||
"rev": "7bff980f37fc24e09dbc986643719900c139bf12",
|
"rev": "b6e74f433b02fa4b8a7965ee24680f4867e2926f",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -49,11 +47,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1778901413,
|
"lastModified": 1774498001,
|
||||||
"narHash": "sha256-GSKXTAnFqRAMlZkJrIPcQMYf+lpMr66K3i60mB9STvc=",
|
"narHash": "sha256-wTfdyzzrmpuqt4TQQNqilF91v0m5Mh1stNy9h7a/WK4=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "pyproject.nix",
|
"repo": "pyproject.nix",
|
||||||
"rev": "a228447c3e179d477c1b6246ef3efa8cfe3c469a",
|
"rev": "794afa6eb588b498344f2eaa36ab1ceb7e6b0b09",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -78,11 +76,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1775636079,
|
"lastModified": 1773297127,
|
||||||
"narHash": "sha256-pc20NRoMdiar8oPQceQT47UUZMBTiMdUuWrYu2obUP0=",
|
"narHash": "sha256-6E/yhXP7Oy/NbXtf1ktzmU8SdVqJQ09HC/48ebEGBpk=",
|
||||||
"owner": "numtide",
|
"owner": "numtide",
|
||||||
"repo": "treefmt-nix",
|
"repo": "treefmt-nix",
|
||||||
"rev": "790751ff7fd3801feeaf96d7dc416a8d581265ba",
|
"rev": "71b125cd05fbfd78cab3e070b73544abe24c5016",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
@ -101,11 +99,11 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1779411315,
|
"lastModified": 1774705889,
|
||||||
"narHash": "sha256-IMFlxeyClau51KplhhSRGhdGTvD/knShHdybP1UOTuk=",
|
"narHash": "sha256-TRTIM18gP3ccBj3m8bV1zx82xeYweNYp8/lgcdR4Zz0=",
|
||||||
"owner": "pyproject-nix",
|
"owner": "pyproject-nix",
|
||||||
"repo": "uv2nix",
|
"repo": "uv2nix",
|
||||||
"rev": "fdf2a76275d7a9c27deb5d2f2ab33526ac9052ff",
|
"rev": "28355ed75b466a15ff324e1baa151b550619fe67",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
|
||||||
14
flake.nix
14
flake.nix
|
|
@ -2,7 +2,7 @@
|
||||||
description = "republisher-redux - offline RSS and Atom feed mirroring";
|
description = "republisher-redux - offline RSS and Atom feed mirroring";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
nixpkgs.url = "github:nixos/nixpkgs/nixos-26.05";
|
nixpkgs.url = "https://flakehub.com/f/NixOS/nixpkgs/0.1";
|
||||||
treefmt-nix = {
|
treefmt-nix = {
|
||||||
url = "github:numtide/treefmt-nix";
|
url = "github:numtide/treefmt-nix";
|
||||||
inputs.nixpkgs.follows = "nixpkgs";
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
|
@ -63,12 +63,6 @@
|
||||||
feedgen = prev.feedgen.overrideAttrs (old: {
|
feedgen = prev.feedgen.overrideAttrs (old: {
|
||||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [ final.setuptools ];
|
||||||
});
|
});
|
||||||
pyvips = prev.pyvips.overrideAttrs (old: {
|
|
||||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
|
||||||
final.setuptools
|
|
||||||
final.pkgconfig
|
|
||||||
];
|
|
||||||
});
|
|
||||||
pygea = prev.pygea.overrideAttrs (old: {
|
pygea = prev.pygea.overrideAttrs (old: {
|
||||||
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
nativeBuildInputs = (old.nativeBuildInputs or [ ]) ++ [
|
||||||
final.hatchling
|
final.hatchling
|
||||||
|
|
@ -114,7 +108,6 @@
|
||||||
checkPhase = ''
|
checkPhase = ''
|
||||||
runHook preCheck
|
runHook preCheck
|
||||||
export HOME="$(mktemp -d)"
|
export HOME="$(mktemp -d)"
|
||||||
export LD_LIBRARY_PATH="${pkgs.lib.makeLibraryPath [ pkgs.vips ]}:$LD_LIBRARY_PATH"
|
|
||||||
pytest tests/ -v
|
pytest tests/ -v
|
||||||
runHook postCheck
|
runHook postCheck
|
||||||
'';
|
'';
|
||||||
|
|
@ -132,8 +125,7 @@
|
||||||
postBuild = ''
|
postBuild = ''
|
||||||
rm -f "$out/bin/repub"
|
rm -f "$out/bin/repub"
|
||||||
makeWrapper "${baseVenv}/bin/repub" "$out/bin/repub" \
|
makeWrapper "${baseVenv}/bin/repub" "$out/bin/repub" \
|
||||||
--prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}" \
|
--prefix PATH : "${pkgs.lib.makeBinPath [ ffmpegPackage ]}"
|
||||||
--prefix LD_LIBRARY_PATH : "${pkgs.lib.makeLibraryPath [ pkgs.vips ]}"
|
|
||||||
'';
|
'';
|
||||||
meta.mainProgram = "repub";
|
meta.mainProgram = "repub";
|
||||||
};
|
};
|
||||||
|
|
@ -281,14 +273,12 @@
|
||||||
packages = [
|
packages = [
|
||||||
pkgs.tailwindcss_4
|
pkgs.tailwindcss_4
|
||||||
pkgs.python313
|
pkgs.python313
|
||||||
pkgs.vips
|
|
||||||
pkgs.uv
|
pkgs.uv
|
||||||
pkgs.pyright
|
pkgs.pyright
|
||||||
(mkFfmpegPackage pkgs)
|
(mkFfmpegPackage pkgs)
|
||||||
];
|
];
|
||||||
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
env.LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [
|
||||||
pkgs.stdenv.cc.cc
|
pkgs.stdenv.cc.cc
|
||||||
pkgs.vips
|
|
||||||
];
|
];
|
||||||
env.UV_PROJECT_ENVIRONMENT = ".venv";
|
env.UV_PROJECT_ENVIRONMENT = ".venv";
|
||||||
env.UV_PYTHON_DOWNLOADS = "never";
|
env.UV_PYTHON_DOWNLOADS = "never";
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ dependencies = [
|
||||||
"colorlog>=6.8.2,<7.0.0",
|
"colorlog>=6.8.2,<7.0.0",
|
||||||
"feedparser>=6.0.11,<7.0.0",
|
"feedparser>=6.0.11,<7.0.0",
|
||||||
"lxml>=5.2.1,<6.0.0",
|
"lxml>=5.2.1,<6.0.0",
|
||||||
"pyvips>=3.0.0,<4.0.0",
|
"pillow>=10.3.0,<11.0.0",
|
||||||
"ffmpeg-python>=0.2.0,<0.3.0",
|
"ffmpeg-python>=0.2.0,<0.3.0",
|
||||||
"Quart>=0.20.0,<0.21.0",
|
"Quart>=0.20.0,<0.21.0",
|
||||||
"hypercorn>=0.18.0,<0.19.0",
|
"hypercorn>=0.18.0,<0.19.0",
|
||||||
|
|
|
||||||
|
|
@ -188,31 +188,21 @@ def build_feed_settings(
|
||||||
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
video_dir = base_settings.get("REPUBLISHER_VIDEO_DIR", VIDEO_DIR)
|
||||||
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
audio_dir = base_settings.get("REPUBLISHER_AUDIO_DIR", AUDIO_DIR)
|
||||||
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
|
file_dir = base_settings.get("REPUBLISHER_FILE_DIR", FILE_DIR)
|
||||||
image_normalize_enabled = convert_images and base_settings.getbool(
|
|
||||||
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True
|
|
||||||
)
|
|
||||||
image_thumbnails_enabled = image_normalize_enabled and base_settings.getbool(
|
|
||||||
"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED", True
|
|
||||||
)
|
|
||||||
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
|
item_pipelines = dict(base_settings.getdict("ITEM_PIPELINES"))
|
||||||
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
|
item_pipelines.pop("repub.pipelines.ImagePipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.ImageNormalizePipeline", None)
|
|
||||||
item_pipelines.pop("repub.pipelines.ImageThumbnailPipeline", None)
|
|
||||||
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
|
item_pipelines.pop("repub.pipelines.AudioPipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
|
item_pipelines.pop("repub.pipelines.VideoPipeline", None)
|
||||||
item_pipelines.pop("repub.pipelines.FilePipeline", None)
|
item_pipelines.pop("repub.pipelines.FilePipeline", None)
|
||||||
item_pipelines.update(
|
item_pipelines.update(
|
||||||
{
|
{
|
||||||
"repub.pipelines.AudioPipeline": 3,
|
"repub.pipelines.AudioPipeline": 2,
|
||||||
"repub.pipelines.FilePipeline": 5,
|
"repub.pipelines.FilePipeline": 4,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
if image_normalize_enabled:
|
if convert_images:
|
||||||
item_pipelines["repub.pipelines.ImageNormalizePipeline"] = 1
|
item_pipelines["repub.pipelines.ImagePipeline"] = 1
|
||||||
if image_thumbnails_enabled:
|
|
||||||
item_pipelines["repub.pipelines.ImageThumbnailPipeline"] = 2
|
|
||||||
if convert_video:
|
if convert_video:
|
||||||
item_pipelines["repub.pipelines.VideoPipeline"] = 4
|
item_pipelines["repub.pipelines.VideoPipeline"] = 3
|
||||||
settings = base_settings.copy()
|
settings = base_settings.copy()
|
||||||
settings.setdict(
|
settings.setdict(
|
||||||
{
|
{
|
||||||
|
|
@ -229,8 +219,6 @@ def build_feed_settings(
|
||||||
"LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
|
"LOG_FILE": str(out_dir / "logs" / f"{feed_slug}.log"),
|
||||||
"HTTPCACHE_DIR": str(out_dir / "httpcache"),
|
"HTTPCACHE_DIR": str(out_dir / "httpcache"),
|
||||||
"REPUBLISHER_IMAGE_DIR": image_dir,
|
"REPUBLISHER_IMAGE_DIR": image_dir,
|
||||||
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED": image_normalize_enabled,
|
|
||||||
"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": image_thumbnails_enabled,
|
|
||||||
"REPUBLISHER_VIDEO_DIR": video_dir,
|
"REPUBLISHER_VIDEO_DIR": video_dir,
|
||||||
"REPUBLISHER_AUDIO_DIR": audio_dir,
|
"REPUBLISHER_AUDIO_DIR": audio_dir,
|
||||||
"REPUBLISHER_FILE_DIR": file_dir,
|
"REPUBLISHER_FILE_DIR": file_dir,
|
||||||
|
|
|
||||||
|
|
@ -9,17 +9,12 @@ from repub.items import (
|
||||||
ChannelElementItem,
|
ChannelElementItem,
|
||||||
ElementItem,
|
ElementItem,
|
||||||
MediaVariant,
|
MediaVariant,
|
||||||
ThumbnailVariant,
|
|
||||||
TranscodedImageFile,
|
|
||||||
TranscodedMediaFile,
|
TranscodedMediaFile,
|
||||||
)
|
)
|
||||||
from repub.utils import FileType, determine_file_type
|
from repub.utils import FileType, determine_file_type
|
||||||
|
|
||||||
MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
|
MEDIA_CONTENT_TAG = QName(rss.nsmap["media"], "content").text
|
||||||
MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
|
MEDIA_GROUP_TAG = QName(rss.nsmap["media"], "group").text
|
||||||
MEDIA_THUMBNAIL_TAG = QName(rss.nsmap["media"], "thumbnail").text
|
|
||||||
ANYNEWS_SLOT_ATTR = QName(rss.nsmap["anynews"], "slot").text
|
|
||||||
ANYNEWS_TYPE_ATTR = QName(rss.nsmap["anynews"], "type").text
|
|
||||||
|
|
||||||
|
|
||||||
class RssExporter(BaseItemExporter):
|
class RssExporter(BaseItemExporter):
|
||||||
|
|
@ -57,9 +52,7 @@ class RssExporter(BaseItemExporter):
|
||||||
key: str(value) for key, value in attrib.items() if value not in (None, "")
|
key: str(value) for key, value in attrib.items() if value not in (None, "")
|
||||||
}
|
}
|
||||||
|
|
||||||
def canonical_variant(
|
def canonical_variant(self, media_file: TranscodedMediaFile) -> MediaVariant | None:
|
||||||
self, media_file: TranscodedMediaFile | TranscodedImageFile
|
|
||||||
) -> MediaVariant | None:
|
|
||||||
for variant in media_file["variants"]:
|
for variant in media_file["variants"]:
|
||||||
if variant.get("isDefault") == "true":
|
if variant.get("isDefault") == "true":
|
||||||
return variant
|
return variant
|
||||||
|
|
@ -99,8 +92,6 @@ class RssExporter(BaseItemExporter):
|
||||||
def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
|
def strip_managed_media_nodes(self, item: ElementItem) -> dict[str, dict[str, str]]:
|
||||||
fallbacks: dict[str, dict[str, str]] = {}
|
fallbacks: dict[str, dict[str, str]] = {}
|
||||||
managed_types: set[FileType] = set()
|
managed_types: set[FileType] = set()
|
||||||
if self.managed_image_files(item):
|
|
||||||
managed_types.add(FileType.IMAGE)
|
|
||||||
if item.audios:
|
if item.audios:
|
||||||
managed_types.add(FileType.AUDIO)
|
managed_types.add(FileType.AUDIO)
|
||||||
if item.videos:
|
if item.videos:
|
||||||
|
|
@ -109,9 +100,6 @@ class RssExporter(BaseItemExporter):
|
||||||
return fallbacks
|
return fallbacks
|
||||||
|
|
||||||
for child in list(item.el):
|
for child in list(item.el):
|
||||||
if child.tag == MEDIA_THUMBNAIL_TAG and FileType.IMAGE in managed_types:
|
|
||||||
item.el.remove(child)
|
|
||||||
continue
|
|
||||||
if child.tag == MEDIA_CONTENT_TAG:
|
if child.tag == MEDIA_CONTENT_TAG:
|
||||||
if self.owned_media_type(child, managed_types) is None:
|
if self.owned_media_type(child, managed_types) is None:
|
||||||
continue
|
continue
|
||||||
|
|
@ -125,43 +113,25 @@ class RssExporter(BaseItemExporter):
|
||||||
|
|
||||||
if child.tag != MEDIA_GROUP_TAG:
|
if child.tag != MEDIA_GROUP_TAG:
|
||||||
continue
|
continue
|
||||||
managed_image_group = False
|
|
||||||
for media_content in list(child):
|
for media_content in list(child):
|
||||||
if media_content.tag != MEDIA_CONTENT_TAG:
|
if media_content.tag != MEDIA_CONTENT_TAG:
|
||||||
continue
|
continue
|
||||||
owned_type = self.owned_media_type(media_content, managed_types)
|
if self.owned_media_type(media_content, managed_types) is None:
|
||||||
if owned_type is None:
|
|
||||||
continue
|
continue
|
||||||
if owned_type == FileType.IMAGE:
|
|
||||||
managed_image_group = True
|
|
||||||
fallbacks[media_content.get("url", "")] = {
|
fallbacks[media_content.get("url", "")] = {
|
||||||
key: value
|
key: value
|
||||||
for key, value in media_content.attrib.items()
|
for key, value in media_content.attrib.items()
|
||||||
if key in {"expression", "lang"}
|
if key in {"expression", "lang"}
|
||||||
}
|
}
|
||||||
child.remove(media_content)
|
child.remove(media_content)
|
||||||
if managed_image_group:
|
|
||||||
for media_thumbnail in list(child):
|
|
||||||
if media_thumbnail.tag == MEDIA_THUMBNAIL_TAG:
|
|
||||||
child.remove(media_thumbnail)
|
|
||||||
if len(child) == 0:
|
if len(child) == 0:
|
||||||
item.el.remove(child)
|
item.el.remove(child)
|
||||||
return fallbacks
|
return fallbacks
|
||||||
|
|
||||||
def managed_image_files(self, item: ElementItem) -> list[TranscodedImageFile]:
|
|
||||||
media_image_urls = set(item.media_image_urls)
|
|
||||||
if not media_image_urls:
|
|
||||||
return []
|
|
||||||
return [image for image in item.images if image["url"] in media_image_urls]
|
|
||||||
|
|
||||||
def append_media_groups(
|
def append_media_groups(
|
||||||
self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
|
self, item: ElementItem, fallbacks: dict[str, dict[str, str]]
|
||||||
):
|
):
|
||||||
for media_file in [
|
for media_file in [*item.audios, *item.videos]:
|
||||||
*self.managed_image_files(item),
|
|
||||||
*item.audios,
|
|
||||||
*item.videos,
|
|
||||||
]:
|
|
||||||
if not media_file["variants"]:
|
if not media_file["variants"]:
|
||||||
continue
|
continue
|
||||||
fallback_attrib = fallbacks.get(media_file["published_url"], {})
|
fallback_attrib = fallbacks.get(media_file["published_url"], {})
|
||||||
|
|
@ -171,11 +141,7 @@ class RssExporter(BaseItemExporter):
|
||||||
**self.media_content_attrib(variant, fallback_attrib)
|
**self.media_content_attrib(variant, fallback_attrib)
|
||||||
)
|
)
|
||||||
for variant in media_file["variants"]
|
for variant in media_file["variants"]
|
||||||
],
|
]
|
||||||
*[
|
|
||||||
rss.MEDIA.thumbnail(**self.media_thumbnail_attrib(thumbnail))
|
|
||||||
for thumbnail in media_file.get("thumbnails", [])
|
|
||||||
],
|
|
||||||
)
|
)
|
||||||
if group is not None:
|
if group is not None:
|
||||||
item.el.append(group)
|
item.el.append(group)
|
||||||
|
|
@ -204,22 +170,10 @@ class RssExporter(BaseItemExporter):
|
||||||
)
|
)
|
||||||
return attrib
|
return attrib
|
||||||
|
|
||||||
def media_thumbnail_attrib(self, thumbnail: ThumbnailVariant) -> dict[str, str]:
|
|
||||||
attrib = self.compact_attrib(
|
|
||||||
url=thumbnail.get("url"),
|
|
||||||
width=thumbnail.get("width"),
|
|
||||||
height=thumbnail.get("height"),
|
|
||||||
)
|
|
||||||
if thumbnail.get("slot"):
|
|
||||||
attrib[ANYNEWS_SLOT_ATTR] = str(thumbnail["slot"])
|
|
||||||
if thumbnail.get("type"):
|
|
||||||
attrib[ANYNEWS_TYPE_ATTR] = str(thumbnail["type"])
|
|
||||||
return attrib
|
|
||||||
|
|
||||||
def apply_transcoded_media(self, item: Any) -> None:
|
def apply_transcoded_media(self, item: Any) -> None:
|
||||||
if not isinstance(item, ElementItem):
|
if not isinstance(item, ElementItem):
|
||||||
return
|
return
|
||||||
if not self.managed_image_files(item) and not item.audios and not item.videos:
|
if not item.audios and not item.videos:
|
||||||
return
|
return
|
||||||
self.rebuild_enclosures(item)
|
self.rebuild_enclosures(item)
|
||||||
fallbacks = self.strip_managed_media_nodes(item)
|
fallbacks = self.strip_managed_media_nodes(item)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from typing import Any, List, TypedDict
|
from typing import Any, List, TypedDict
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -8,7 +8,7 @@ class MediaVariant(TypedDict, total=False):
|
||||||
type: str
|
type: str
|
||||||
medium: str
|
medium: str
|
||||||
isDefault: str
|
isDefault: str
|
||||||
fileSize: int | str
|
fileSize: str
|
||||||
bitrate: int | float | str
|
bitrate: int | float | str
|
||||||
samplingrate: int | str
|
samplingrate: int | str
|
||||||
channels: int | str
|
channels: int | str
|
||||||
|
|
@ -29,39 +29,18 @@ class TranscodedMediaFile(TypedDict):
|
||||||
variants: List[MediaVariant]
|
variants: List[MediaVariant]
|
||||||
|
|
||||||
|
|
||||||
class ThumbnailVariant(TypedDict, total=False):
|
|
||||||
url: str
|
|
||||||
path: str
|
|
||||||
width: int | str
|
|
||||||
height: int | str
|
|
||||||
slot: str
|
|
||||||
type: str
|
|
||||||
|
|
||||||
|
|
||||||
class TranscodedImageFile(TypedDict):
|
|
||||||
url: str
|
|
||||||
path: str
|
|
||||||
checksum: str | None
|
|
||||||
status: str
|
|
||||||
published_url: str
|
|
||||||
source_path: str
|
|
||||||
variants: List[MediaVariant]
|
|
||||||
thumbnails: List[ThumbnailVariant]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ElementItem:
|
class ElementItem:
|
||||||
feed_name: str
|
feed_name: str
|
||||||
el: Any
|
el: Any
|
||||||
image_urls: List[str]
|
image_urls: List[str]
|
||||||
images: List[TranscodedImageFile]
|
images: List[Any]
|
||||||
file_urls: List[str]
|
file_urls: List[str]
|
||||||
files: List[Any]
|
files: List[Any]
|
||||||
audio_urls: List[str]
|
audio_urls: List[str]
|
||||||
audios: List[TranscodedMediaFile]
|
audios: List[TranscodedMediaFile]
|
||||||
video_urls: List[str]
|
video_urls: List[str]
|
||||||
videos: List[TranscodedMediaFile]
|
videos: List[TranscodedMediaFile]
|
||||||
media_image_urls: List[str] = field(default_factory=list)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|
@ -69,5 +48,4 @@ class ChannelElementItem:
|
||||||
feed_name: str
|
feed_name: str
|
||||||
el: Any
|
el: Any
|
||||||
image_urls: List[str]
|
image_urls: List[str]
|
||||||
images: List[TranscodedImageFile]
|
images: List[Any]
|
||||||
media_image_urls: List[str] = field(default_factory=list)
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import functools
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
|
@ -9,482 +8,24 @@ from os import PathLike
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Union, cast
|
from typing import Any, Dict, List, Optional, Union, cast
|
||||||
|
|
||||||
import pyvips
|
|
||||||
from scrapy.crawler import Crawler
|
from scrapy.crawler import Crawler
|
||||||
from scrapy.pipelines.files import FileException
|
from scrapy.pipelines.files import FileException
|
||||||
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
from scrapy.pipelines.files import FilesPipeline as BaseFilesPipeline
|
||||||
|
from scrapy.pipelines.images import ImagesPipeline as BaseImagesPipeline
|
||||||
|
|
||||||
import repub.utils
|
import repub.utils
|
||||||
from repub import media
|
from repub import media
|
||||||
from repub.items import (
|
from repub.items import MediaVariant, TranscodedMediaFile
|
||||||
MediaVariant,
|
|
||||||
ThumbnailVariant,
|
|
||||||
TranscodedImageFile,
|
|
||||||
TranscodedMediaFile,
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class ImageException(FileException):
|
class ImagePipeline(BaseImagesPipeline):
|
||||||
"""General image error exception"""
|
|
||||||
|
|
||||||
|
|
||||||
def image_mimetype(response=None, *, url: str | None = None) -> str | None:
|
|
||||||
del url
|
|
||||||
if response is not None:
|
|
||||||
content_type = response.headers.get(b"Content-Type")
|
|
||||||
if content_type:
|
|
||||||
return content_type.decode("utf-8").split(";", 1)[0].strip()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def image_loader_name(image: Any) -> str:
|
|
||||||
if image.get_typeof("vips-loader"):
|
|
||||||
return str(image.get("vips-loader"))
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def image_loader_mimetype(loader: str, fallback: str | None = None) -> str | None:
|
|
||||||
known = {
|
|
||||||
"jpegload": "image/jpeg",
|
|
||||||
"pngload": "image/png",
|
|
||||||
"gifload": "image/gif",
|
|
||||||
"svgload": "image/svg+xml",
|
|
||||||
"tiffload": "image/tiff",
|
|
||||||
"webpload": "image/webp",
|
|
||||||
"heifload": "image/heif",
|
|
||||||
"jxlload": "image/jxl",
|
|
||||||
}
|
|
||||||
for prefix, mimetype in known.items():
|
|
||||||
if loader.startswith(prefix):
|
|
||||||
return mimetype
|
|
||||||
return fallback
|
|
||||||
|
|
||||||
|
|
||||||
def load_image_from_buffer(body: bytes) -> Any:
|
|
||||||
try:
|
|
||||||
return cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.new_from_buffer(body, "", access="sequential"),
|
|
||||||
)
|
|
||||||
except pyvips.Error as exc:
|
|
||||||
raise ImageException(str(exc)) from exc
|
|
||||||
|
|
||||||
|
|
||||||
def load_image_from_file(file_path: str | Path) -> Any:
|
|
||||||
try:
|
|
||||||
return cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.new_from_file(str(file_path), access="sequential"),
|
|
||||||
)
|
|
||||||
except pyvips.Error as exc:
|
|
||||||
raise ImageException(str(exc)) from exc
|
|
||||||
|
|
||||||
|
|
||||||
def render_image_profile(source_path: str | Path, profile: dict[str, Any]) -> BytesIO:
|
|
||||||
transform = str(profile["transform"])
|
|
||||||
transform_kwargs = dict(profile.get("transform_kwargs", {}))
|
|
||||||
width = int(transform_kwargs.pop("width"))
|
|
||||||
if transform == "thumbnail":
|
|
||||||
image = cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.thumbnail(str(source_path), width, **transform_kwargs),
|
|
||||||
)
|
|
||||||
elif transform == "thumbnail_buffer":
|
|
||||||
image = cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.thumbnail_buffer(
|
|
||||||
Path(source_path).read_bytes(),
|
|
||||||
width,
|
|
||||||
**transform_kwargs,
|
|
||||||
),
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ImageException(f"Unsupported image transform: {transform}")
|
|
||||||
|
|
||||||
image = image.colourspace("srgb")
|
|
||||||
if image.hasalpha() and (
|
|
||||||
profile["mimetype"] == "image/jpeg"
|
|
||||||
or "background" in profile.get("save_kwargs", {})
|
|
||||||
):
|
|
||||||
image = image.flatten(
|
|
||||||
background=profile.get("save_kwargs", {}).get("background", [255, 255, 255])
|
|
||||||
)
|
|
||||||
|
|
||||||
save_name = str(profile["save"])
|
|
||||||
try:
|
|
||||||
image_bytes = getattr(image, save_name)(**dict(profile.get("save_kwargs", {})))
|
|
||||||
except pyvips.Error as exc:
|
|
||||||
raise ImageException(str(exc)) from exc
|
|
||||||
return BytesIO(cast(bytes, image_bytes))
|
|
||||||
|
|
||||||
|
|
||||||
def image_buffer_meta(
|
|
||||||
body: bytes,
|
|
||||||
*,
|
|
||||||
fallback_mimetype: str | None = None,
|
|
||||||
) -> tuple[int, int, int, str | None]:
|
|
||||||
image = load_image_from_buffer(body)
|
|
||||||
mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
|
|
||||||
return image.width, image.height, len(body), mimetype
|
|
||||||
|
|
||||||
|
|
||||||
def image_variant_meta(
|
|
||||||
file_path: str | Path,
|
|
||||||
*,
|
|
||||||
fallback_mimetype: str | None = None,
|
|
||||||
) -> tuple[int, int, int, str | None]:
|
|
||||||
image = load_image_from_file(file_path)
|
|
||||||
mimetype = image_loader_mimetype(image_loader_name(image), fallback_mimetype)
|
|
||||||
return image.width, image.height, Path(file_path).stat().st_size, mimetype
|
|
||||||
|
|
||||||
|
|
||||||
class ImageNormalizePipeline(BaseFilesPipeline):
|
|
||||||
MEDIA_NAME = "image"
|
|
||||||
EXPIRES = 90
|
|
||||||
MIN_WIDTH = 0
|
|
||||||
MIN_HEIGHT = 0
|
|
||||||
DEFAULT_FILES_URLS_FIELD = "image_urls"
|
|
||||||
DEFAULT_FILES_RESULT_FIELD = "images"
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_crawler(cls, crawler: Crawler):
|
|
||||||
cls._update_stores(crawler.settings)
|
|
||||||
return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
|
|
||||||
|
|
||||||
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
|
||||||
self.settings = crawler.settings
|
|
||||||
super().__init__(store_uri, crawler=crawler)
|
|
||||||
resolve = functools.partial(
|
|
||||||
self._key_for_pipe,
|
|
||||||
base_class_name="ImagesPipeline",
|
|
||||||
settings=self.settings,
|
|
||||||
)
|
|
||||||
self.expires = self.settings.getint(resolve("IMAGES_EXPIRES"), self.EXPIRES)
|
|
||||||
self.files_urls_field = self.settings.get(
|
|
||||||
resolve("IMAGES_URLS_FIELD"),
|
|
||||||
self.DEFAULT_FILES_URLS_FIELD,
|
|
||||||
)
|
|
||||||
self.files_result_field = self.settings.get(
|
|
||||||
resolve("IMAGES_RESULT_FIELD"),
|
|
||||||
self.DEFAULT_FILES_RESULT_FIELD,
|
|
||||||
)
|
|
||||||
self.min_width = self.settings.getint(
|
|
||||||
resolve("IMAGES_MIN_WIDTH"),
|
|
||||||
self.MIN_WIDTH,
|
|
||||||
)
|
|
||||||
self.min_height = self.settings.getint(
|
|
||||||
resolve("IMAGES_MIN_HEIGHT"),
|
|
||||||
self.MIN_HEIGHT,
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_image_settings(self) -> list[dict[str, Any]]:
|
|
||||||
return list(self.settings["REPUBLISHER_IMAGE"])
|
|
||||||
|
|
||||||
def file_path(self, request, response=None, info=None, *, item=None):
|
def file_path(self, request, response=None, info=None, *, item=None):
|
||||||
return repub.utils.canonical_published_image_path(
|
return repub.utils.local_image_path(request.url)
|
||||||
request.url,
|
|
||||||
self.get_image_settings(),
|
|
||||||
)
|
|
||||||
|
|
||||||
def source_path(self, request, response=None) -> str:
|
def thumb_path(self, request, thumb_id, response=None, info=None, *, item=None):
|
||||||
return repub.utils.source_image_path(
|
raise NotImplementedError()
|
||||||
request.url,
|
|
||||||
image_mimetype(response, url=request.url),
|
|
||||||
)
|
|
||||||
|
|
||||||
def resolve_source_path(self, request, response=None) -> str:
|
|
||||||
source_path = self.source_path(request, response)
|
|
||||||
if response is not None:
|
|
||||||
return source_path
|
|
||||||
source_file = self.local_store_path(source_path)
|
|
||||||
if source_file.exists():
|
|
||||||
return source_path
|
|
||||||
source_dir = self.local_store_path(
|
|
||||||
str(self.settings.get("REPUBLISHER_IMAGE_SOURCE_SUBDIR", "source"))
|
|
||||||
)
|
|
||||||
guid = repub.utils.image_guid(request.url)
|
|
||||||
matches = sorted(source_dir.glob(f"{guid}.*"))
|
|
||||||
if matches:
|
|
||||||
return f"{source_dir.name}/{matches[0].name}"
|
|
||||||
return source_path
|
|
||||||
|
|
||||||
def variant_paths(self, source_url: str) -> list[tuple[bool, dict[str, Any], str]]:
|
|
||||||
return [
|
|
||||||
(
|
|
||||||
index == 0,
|
|
||||||
setting,
|
|
||||||
repub.utils.published_image_path(source_url, setting),
|
|
||||||
)
|
|
||||||
for index, setting in enumerate(self.get_image_settings())
|
|
||||||
]
|
|
||||||
|
|
||||||
def published_url(self, path: str, item=None) -> str:
|
|
||||||
relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
|
|
||||||
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
|
||||||
if feed_url == "" or item is None:
|
|
||||||
return relative_path
|
|
||||||
return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
|
|
||||||
|
|
||||||
def local_store_path(self, path: str) -> Path:
|
|
||||||
return Path(cast(Any, self.store).basedir) / path
|
|
||||||
|
|
||||||
def image_variant(
|
|
||||||
self,
|
|
||||||
*,
|
|
||||||
path: str,
|
|
||||||
mimetype: str,
|
|
||||||
width: int,
|
|
||||||
height: int,
|
|
||||||
file_size: int,
|
|
||||||
is_default: bool,
|
|
||||||
item=None,
|
|
||||||
) -> MediaVariant:
|
|
||||||
variant: MediaVariant = {
|
|
||||||
"url": self.published_url(path, item),
|
|
||||||
"path": path,
|
|
||||||
"type": mimetype,
|
|
||||||
"medium": repub.utils.FileType.IMAGE.value,
|
|
||||||
"isDefault": "true" if is_default else "false",
|
|
||||||
"fileSize": file_size,
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
}
|
|
||||||
return variant
|
|
||||||
|
|
||||||
def load_variants_from_disk(self, request, *, item=None) -> list[MediaVariant]:
|
|
||||||
variants: list[MediaVariant] = []
|
|
||||||
for is_default, setting, path in self.variant_paths(request.url):
|
|
||||||
file_path = self.local_store_path(path)
|
|
||||||
if not file_path.exists():
|
|
||||||
continue
|
|
||||||
width, height, file_size, mimetype = image_variant_meta(
|
|
||||||
file_path,
|
|
||||||
fallback_mimetype=setting["mimetype"],
|
|
||||||
)
|
|
||||||
variants.append(
|
|
||||||
self.image_variant(
|
|
||||||
path=path,
|
|
||||||
mimetype=mimetype or setting["mimetype"],
|
|
||||||
width=width,
|
|
||||||
height=height,
|
|
||||||
file_size=file_size,
|
|
||||||
is_default=is_default,
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return variants
|
|
||||||
|
|
||||||
def make_file_result(
|
|
||||||
self,
|
|
||||||
request,
|
|
||||||
*,
|
|
||||||
checksum: str | None,
|
|
||||||
status: str,
|
|
||||||
response=None,
|
|
||||||
item=None,
|
|
||||||
) -> TranscodedImageFile:
|
|
||||||
path = self.file_path(request, item=item)
|
|
||||||
return {
|
|
||||||
"url": request.url,
|
|
||||||
"path": path,
|
|
||||||
"published_url": self.published_url(path, item),
|
|
||||||
"checksum": checksum,
|
|
||||||
"status": status,
|
|
||||||
"source_path": self.resolve_source_path(request, response),
|
|
||||||
"variants": self.load_variants_from_disk(request, item=item),
|
|
||||||
"thumbnails": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
def media_to_download(self, request, info, *, item=None):
|
|
||||||
canonical_path = self.file_path(request, info=info, item=item)
|
|
||||||
canonical_stat = cast(
|
|
||||||
dict[str, Any] | None,
|
|
||||||
self.store.stat_file(canonical_path, info),
|
|
||||||
)
|
|
||||||
if not canonical_stat:
|
|
||||||
return None
|
|
||||||
last_modified = canonical_stat.get("last_modified")
|
|
||||||
if not last_modified:
|
|
||||||
return None
|
|
||||||
age_days = (time.time() - last_modified) / 60 / 60 / 24
|
|
||||||
if age_days > self.expires:
|
|
||||||
return None
|
|
||||||
if not cast(
|
|
||||||
dict[str, Any] | None,
|
|
||||||
self.store.stat_file(self.resolve_source_path(request), info),
|
|
||||||
):
|
|
||||||
return None
|
|
||||||
for _, _, path in self.variant_paths(request.url):
|
|
||||||
if not cast(dict[str, Any] | None, self.store.stat_file(path, info)):
|
|
||||||
return None
|
|
||||||
self.inc_stats("uptodate")
|
|
||||||
return self.make_file_result(
|
|
||||||
request,
|
|
||||||
checksum=canonical_stat.get("checksum"),
|
|
||||||
status="uptodate",
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
|
|
||||||
def persist_variants(self, response, request, info, *, item=None) -> str | None:
|
|
||||||
source_file_path = self.local_store_path(self.source_path(request, response))
|
|
||||||
source_buf = BytesIO(response.body)
|
|
||||||
source_image = load_image_from_buffer(response.body).autorot()
|
|
||||||
if source_image.width < self.min_width or source_image.height < self.min_height:
|
|
||||||
raise ImageException(
|
|
||||||
"Image too small "
|
|
||||||
f"({source_image.width}x{source_image.height} < "
|
|
||||||
f"{self.min_width}x{self.min_height})"
|
|
||||||
)
|
|
||||||
if not cast(
|
|
||||||
dict[str, Any] | None,
|
|
||||||
self.store.stat_file(self.source_path(request, response), info),
|
|
||||||
):
|
|
||||||
self.store.persist_file(
|
|
||||||
self.source_path(request, response),
|
|
||||||
source_buf,
|
|
||||||
info,
|
|
||||||
meta={"width": source_image.width, "height": source_image.height},
|
|
||||||
headers={
|
|
||||||
"Content-Type": image_loader_mimetype(
|
|
||||||
image_loader_name(source_image),
|
|
||||||
image_mimetype(response, url=request.url),
|
|
||||||
)
|
|
||||||
or "application/octet-stream"
|
|
||||||
},
|
|
||||||
)
|
|
||||||
canonical_path = self.file_path(
|
|
||||||
request, response=response, info=info, item=item
|
|
||||||
)
|
|
||||||
canonical_checksum = None
|
|
||||||
for _, setting, final_path in self.variant_paths(request.url):
|
|
||||||
stat = cast(dict[str, Any] | None, self.store.stat_file(final_path, info))
|
|
||||||
if stat:
|
|
||||||
if final_path == canonical_path:
|
|
||||||
canonical_checksum = stat.get("checksum")
|
|
||||||
continue
|
|
||||||
out_buf = render_image_profile(source_file_path, setting)
|
|
||||||
width, height, file_size, _ = image_buffer_meta(
|
|
||||||
out_buf.getvalue(),
|
|
||||||
fallback_mimetype=setting["mimetype"],
|
|
||||||
)
|
|
||||||
checksum = buffer_checksum(out_buf)
|
|
||||||
self.store.persist_file(
|
|
||||||
final_path,
|
|
||||||
out_buf,
|
|
||||||
info,
|
|
||||||
meta={"width": width, "height": height, "fileSize": file_size},
|
|
||||||
headers={"Content-Type": setting["mimetype"]},
|
|
||||||
)
|
|
||||||
if final_path == canonical_path:
|
|
||||||
canonical_checksum = checksum
|
|
||||||
return canonical_checksum
|
|
||||||
|
|
||||||
def media_downloaded(self, response, request, info, *, item=None):
|
|
||||||
if response.status != 200:
|
|
||||||
raise FileException("download-error")
|
|
||||||
if not response.body:
|
|
||||||
raise FileException("empty-content")
|
|
||||||
status = "cached" if "cached" in response.flags else "downloaded"
|
|
||||||
self.inc_stats(status)
|
|
||||||
checksum = self.persist_variants(response, request, info, item=item)
|
|
||||||
return self.make_file_result(
|
|
||||||
request,
|
|
||||||
checksum=checksum,
|
|
||||||
status=status,
|
|
||||||
response=response,
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ImageThumbnailPipeline:
|
|
||||||
@classmethod
|
|
||||||
def from_crawler(cls, crawler: Crawler):
|
|
||||||
return cls(crawler.settings["IMAGES_STORE"], crawler=crawler)
|
|
||||||
|
|
||||||
def __init__(self, store_uri: Union[str, PathLike], *, crawler: Crawler):
|
|
||||||
self.settings = crawler.settings
|
|
||||||
self.store_dir = Path(store_uri)
|
|
||||||
|
|
||||||
def get_thumbnail_settings(self) -> list[dict[str, Any]]:
|
|
||||||
return list(self.settings["REPUBLISHER_IMAGE_THUMBNAILS"])
|
|
||||||
|
|
||||||
def local_store_path(self, path: str) -> Path:
|
|
||||||
return self.store_dir / path
|
|
||||||
|
|
||||||
def published_url(self, path: str, item=None) -> str:
|
|
||||||
relative_path = f"{self.settings['REPUBLISHER_IMAGE_DIR']}/{path}"
|
|
||||||
feed_url = str(self.settings.get("REPUBLISHER_FEED_URL", "")).rstrip("/")
|
|
||||||
if feed_url == "" or item is None:
|
|
||||||
return relative_path
|
|
||||||
return f"{feed_url}/feeds/{item.feed_name}/{relative_path}"
|
|
||||||
|
|
||||||
def persist_thumbnail(
|
|
||||||
self, source_file: Path, final_path: str, profile: dict[str, Any]
|
|
||||||
):
|
|
||||||
out_buf = render_image_profile(source_file, profile)
|
|
||||||
target = self.local_store_path(final_path)
|
|
||||||
target.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
target.write_bytes(out_buf.getvalue())
|
|
||||||
|
|
||||||
def load_thumbnail(
|
|
||||||
self,
|
|
||||||
*,
|
|
||||||
source_url: str,
|
|
||||||
profile: dict[str, Any],
|
|
||||||
item=None,
|
|
||||||
) -> ThumbnailVariant | None:
|
|
||||||
final_path = repub.utils.thumbnail_image_path(source_url, profile)
|
|
||||||
file_path = self.local_store_path(final_path)
|
|
||||||
if not file_path.exists():
|
|
||||||
return None
|
|
||||||
width, height, _, mimetype = image_variant_meta(
|
|
||||||
file_path,
|
|
||||||
fallback_mimetype=profile["mimetype"],
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"url": self.published_url(final_path, item),
|
|
||||||
"path": final_path,
|
|
||||||
"slot": str(profile["name"]),
|
|
||||||
"type": mimetype or profile["mimetype"],
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
}
|
|
||||||
|
|
||||||
def process_item(self, item, spider):
|
|
||||||
del spider
|
|
||||||
if not getattr(item, "images", None):
|
|
||||||
return item
|
|
||||||
for image in item.images:
|
|
||||||
source_path = image.get("source_path")
|
|
||||||
if not source_path:
|
|
||||||
image["thumbnails"] = []
|
|
||||||
continue
|
|
||||||
source_file = self.local_store_path(source_path)
|
|
||||||
thumbnails: list[ThumbnailVariant] = []
|
|
||||||
for profile in self.get_thumbnail_settings():
|
|
||||||
final_path = repub.utils.thumbnail_image_path(image["url"], profile)
|
|
||||||
if not self.local_store_path(final_path).exists():
|
|
||||||
try:
|
|
||||||
self.persist_thumbnail(source_file, final_path, profile)
|
|
||||||
except ImageException as exc:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to generate thumbnail for %s: %s", image["url"], exc
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
thumbnail = self.load_thumbnail(
|
|
||||||
source_url=image["url"],
|
|
||||||
profile=profile,
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
if thumbnail is not None:
|
|
||||||
thumbnails.append(thumbnail)
|
|
||||||
image["thumbnails"] = thumbnails
|
|
||||||
return item
|
|
||||||
|
|
||||||
|
|
||||||
ImagePipeline = ImageNormalizePipeline
|
|
||||||
|
|
||||||
|
|
||||||
class FilePipeline(BaseFilesPipeline):
|
class FilePipeline(BaseFilesPipeline):
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,6 @@ nsmap = {
|
||||||
"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
"itunes": "http://www.itunes.com/dtds/podcast-1.0.dtd",
|
||||||
"dc": "http://purl.org/dc/elements/1.1/",
|
"dc": "http://purl.org/dc/elements/1.1/",
|
||||||
"atom": "http://www.w3.org/2005/Atom",
|
"atom": "http://www.w3.org/2005/Atom",
|
||||||
"anynews": "https://guardianproject.info/rss/anynews/1.0",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
|
CONTENT = SafeElementMaker(nsmap={None: nsmap["content"]}, namespace=nsmap["content"])
|
||||||
|
|
|
||||||
|
|
@ -100,116 +100,6 @@ LOG_LEVEL = "INFO"
|
||||||
|
|
||||||
MEDIA_ALLOW_REDIRECTS = True
|
MEDIA_ALLOW_REDIRECTS = True
|
||||||
|
|
||||||
REPUBLISHER_IMAGE_NORMALIZE_ENABLED = True
|
|
||||||
REPUBLISHER_IMAGE_THUMBNAILS_ENABLED = True
|
|
||||||
|
|
||||||
REPUBLISHER_IMAGE_DIR = "images"
|
|
||||||
REPUBLISHER_IMAGE_FULL_SUBDIR = "full"
|
|
||||||
REPUBLISHER_IMAGE_SOURCE_SUBDIR = "source"
|
|
||||||
REPUBLISHER_IMAGE_THUMBNAIL_SUBDIR = "thumbs"
|
|
||||||
|
|
||||||
REPUBLISHER_IMAGE = [
|
|
||||||
{
|
|
||||||
"name": "main_webp",
|
|
||||||
"mimetype": "image/webp",
|
|
||||||
"extension": "webp",
|
|
||||||
"transform": "thumbnail",
|
|
||||||
"transform_kwargs": {
|
|
||||||
"width": 1600,
|
|
||||||
"height": 1600,
|
|
||||||
"size": "down",
|
|
||||||
"no_rotate": False,
|
|
||||||
"linear": False,
|
|
||||||
"fail_on": "warning",
|
|
||||||
},
|
|
||||||
"save": "webpsave_buffer",
|
|
||||||
"save_kwargs": {
|
|
||||||
"Q": 82,
|
|
||||||
"preset": "photo",
|
|
||||||
"smart_subsample": True,
|
|
||||||
"effort": 4,
|
|
||||||
"alpha_q": 90,
|
|
||||||
"keep": "none",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "fallback_jpeg",
|
|
||||||
"mimetype": "image/jpeg",
|
|
||||||
"extension": "jpg",
|
|
||||||
"transform": "thumbnail",
|
|
||||||
"transform_kwargs": {
|
|
||||||
"width": 1600,
|
|
||||||
"height": 1600,
|
|
||||||
"size": "down",
|
|
||||||
"no_rotate": False,
|
|
||||||
"linear": False,
|
|
||||||
"fail_on": "warning",
|
|
||||||
},
|
|
||||||
"save": "jpegsave_buffer",
|
|
||||||
"save_kwargs": {
|
|
||||||
"Q": 85,
|
|
||||||
"interlace": True,
|
|
||||||
"optimize_coding": True,
|
|
||||||
"trellis_quant": True,
|
|
||||||
"optimize_scans": True,
|
|
||||||
"subsample_mode": "auto",
|
|
||||||
"keep": "none",
|
|
||||||
"background": [255, 255, 255],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
REPUBLISHER_IMAGE_THUMBNAILS = [
|
|
||||||
{
|
|
||||||
"name": "card_hero",
|
|
||||||
"mimetype": "image/jpeg",
|
|
||||||
"extension": "jpg",
|
|
||||||
"transform": "thumbnail",
|
|
||||||
"transform_kwargs": {
|
|
||||||
"width": 640,
|
|
||||||
"height": 360,
|
|
||||||
"size": "down",
|
|
||||||
"crop": "attention",
|
|
||||||
"no_rotate": False,
|
|
||||||
"linear": False,
|
|
||||||
"fail_on": "warning",
|
|
||||||
},
|
|
||||||
"save": "jpegsave_buffer",
|
|
||||||
"save_kwargs": {
|
|
||||||
"Q": 82,
|
|
||||||
"interlace": True,
|
|
||||||
"optimize_coding": True,
|
|
||||||
"subsample_mode": "auto",
|
|
||||||
"keep": "none",
|
|
||||||
"background": [255, 255, 255],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "list_square",
|
|
||||||
"mimetype": "image/jpeg",
|
|
||||||
"extension": "jpg",
|
|
||||||
"transform": "thumbnail",
|
|
||||||
"transform_kwargs": {
|
|
||||||
"width": 160,
|
|
||||||
"height": 160,
|
|
||||||
"size": "down",
|
|
||||||
"crop": "centre",
|
|
||||||
"no_rotate": False,
|
|
||||||
"linear": False,
|
|
||||||
"fail_on": "warning",
|
|
||||||
},
|
|
||||||
"save": "jpegsave_buffer",
|
|
||||||
"save_kwargs": {
|
|
||||||
"Q": 78,
|
|
||||||
"interlace": True,
|
|
||||||
"optimize_coding": True,
|
|
||||||
"subsample_mode": "auto",
|
|
||||||
"keep": "none",
|
|
||||||
"background": [255, 255, 255],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
|
|
||||||
REPUBLISHER_AUDIO = [
|
REPUBLISHER_AUDIO = [
|
||||||
{
|
{
|
||||||
"name": "mp3_vbr7_voice",
|
"name": "mp3_vbr7_voice",
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ from repub.rss import (
|
||||||
)
|
)
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
canonical_published_image_path,
|
|
||||||
canonical_published_media_path,
|
canonical_published_media_path,
|
||||||
determine_file_type,
|
determine_file_type,
|
||||||
local_file_path,
|
local_file_path,
|
||||||
|
|
@ -55,16 +54,7 @@ class BaseRssFeedSpider(Spider):
|
||||||
local_path = local_file_path(url)
|
local_path = local_file_path(url)
|
||||||
if file_type == FileType.IMAGE:
|
if file_type == FileType.IMAGE:
|
||||||
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
||||||
image_profiles = (
|
local_path = local_image_path(url)
|
||||||
self.settings.get("REPUBLISHER_IMAGE") or []
|
|
||||||
if self.settings.getbool("REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True)
|
|
||||||
else []
|
|
||||||
)
|
|
||||||
local_path = (
|
|
||||||
canonical_published_image_path(url, image_profiles)
|
|
||||||
if image_profiles
|
|
||||||
else local_image_path(url)
|
|
||||||
)
|
|
||||||
elif file_type == FileType.VIDEO:
|
elif file_type == FileType.VIDEO:
|
||||||
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
||||||
local_path = canonical_published_media_path(
|
local_path = canonical_published_media_path(
|
||||||
|
|
@ -288,7 +278,6 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
|
|
||||||
def parse_entry(self, response, feed, entry):
|
def parse_entry(self, response, feed, entry):
|
||||||
image_urls = []
|
image_urls = []
|
||||||
media_image_urls = []
|
|
||||||
file_urls = []
|
file_urls = []
|
||||||
audio_urls = []
|
audio_urls = []
|
||||||
video_urls = []
|
video_urls = []
|
||||||
|
|
@ -334,7 +323,6 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
)
|
)
|
||||||
if entry.get("image"):
|
if entry.get("image"):
|
||||||
image_urls.append(entry.get("image").href)
|
image_urls.append(entry.get("image").href)
|
||||||
media_image_urls.append(entry.get("image").href)
|
|
||||||
for enc in entry.enclosures:
|
for enc in entry.enclosures:
|
||||||
url = enc.get("href")
|
url = enc.get("href")
|
||||||
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
|
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
|
||||||
|
|
@ -393,8 +381,6 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
add_url(file_type, media.get("url"))
|
add_url(file_type, media.get("url"))
|
||||||
if file_type == FileType.IMAGE:
|
|
||||||
media_image_urls.append(media.get("url"))
|
|
||||||
return ElementItem(
|
return ElementItem(
|
||||||
feed_name=self.feed_name,
|
feed_name=self.feed_name,
|
||||||
el=item,
|
el=item,
|
||||||
|
|
@ -406,7 +392,6 @@ class RssFeedSpider(BaseRssFeedSpider):
|
||||||
audios=[],
|
audios=[],
|
||||||
video_urls=video_urls,
|
video_urls=video_urls,
|
||||||
videos=[],
|
videos=[],
|
||||||
media_image_urls=media_image_urls,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
|
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
/*! tailwindcss v4.3.0 | MIT License | https://tailwindcss.com */
|
/*! tailwindcss v4.2.1 | MIT License | https://tailwindcss.com */
|
||||||
@layer properties;
|
@layer properties;
|
||||||
@layer theme, base, components, utilities;
|
@layer theme, base, components, utilities;
|
||||||
@layer theme {
|
@layer theme {
|
||||||
|
|
@ -245,6 +245,9 @@
|
||||||
.inset-x-0 {
|
.inset-x-0 {
|
||||||
inset-inline: calc(var(--spacing) * 0);
|
inset-inline: calc(var(--spacing) * 0);
|
||||||
}
|
}
|
||||||
|
.start {
|
||||||
|
inset-inline-start: var(--spacing);
|
||||||
|
}
|
||||||
.top-0 {
|
.top-0 {
|
||||||
top: calc(var(--spacing) * 0);
|
top: calc(var(--spacing) * 0);
|
||||||
}
|
}
|
||||||
|
|
@ -416,9 +419,6 @@
|
||||||
.rotate-180 {
|
.rotate-180 {
|
||||||
rotate: 180deg;
|
rotate: 180deg;
|
||||||
}
|
}
|
||||||
.transform {
|
|
||||||
transform: var(--tw-rotate-x,) var(--tw-rotate-y,) var(--tw-rotate-z,) var(--tw-skew-x,) var(--tw-skew-y,);
|
|
||||||
}
|
|
||||||
.animate-pulse {
|
.animate-pulse {
|
||||||
animation: var(--animate-pulse);
|
animation: var(--animate-pulse);
|
||||||
}
|
}
|
||||||
|
|
@ -1221,26 +1221,6 @@
|
||||||
inherits: false;
|
inherits: false;
|
||||||
initial-value: 0;
|
initial-value: 0;
|
||||||
}
|
}
|
||||||
@property --tw-rotate-x {
|
|
||||||
syntax: "*";
|
|
||||||
inherits: false;
|
|
||||||
}
|
|
||||||
@property --tw-rotate-y {
|
|
||||||
syntax: "*";
|
|
||||||
inherits: false;
|
|
||||||
}
|
|
||||||
@property --tw-rotate-z {
|
|
||||||
syntax: "*";
|
|
||||||
inherits: false;
|
|
||||||
}
|
|
||||||
@property --tw-skew-x {
|
|
||||||
syntax: "*";
|
|
||||||
inherits: false;
|
|
||||||
}
|
|
||||||
@property --tw-skew-y {
|
|
||||||
syntax: "*";
|
|
||||||
inherits: false;
|
|
||||||
}
|
|
||||||
@property --tw-space-y-reverse {
|
@property --tw-space-y-reverse {
|
||||||
syntax: "*";
|
syntax: "*";
|
||||||
inherits: false;
|
inherits: false;
|
||||||
|
|
@ -1480,11 +1460,6 @@
|
||||||
--tw-translate-x: 0;
|
--tw-translate-x: 0;
|
||||||
--tw-translate-y: 0;
|
--tw-translate-y: 0;
|
||||||
--tw-translate-z: 0;
|
--tw-translate-z: 0;
|
||||||
--tw-rotate-x: initial;
|
|
||||||
--tw-rotate-y: initial;
|
|
||||||
--tw-rotate-z: initial;
|
|
||||||
--tw-skew-x: initial;
|
|
||||||
--tw-skew-y: initial;
|
|
||||||
--tw-space-y-reverse: 0;
|
--tw-space-y-reverse: 0;
|
||||||
--tw-space-x-reverse: 0;
|
--tw-space-x-reverse: 0;
|
||||||
--tw-divide-y-reverse: 0;
|
--tw-divide-y-reverse: 0;
|
||||||
|
|
|
||||||
|
|
@ -43,50 +43,6 @@ def local_audio_path(s: str) -> str:
|
||||||
return local_file_path(s)
|
return local_file_path(s)
|
||||||
|
|
||||||
|
|
||||||
def image_guid(source_url: str) -> str:
|
|
||||||
return hashlib.sha1(to_bytes(source_url)).hexdigest() # nosec
|
|
||||||
|
|
||||||
|
|
||||||
def image_extension(mimetype_or_extension: str | None, source_url: str = "") -> str:
|
|
||||||
if mimetype_or_extension:
|
|
||||||
if mimetype_or_extension.startswith("."):
|
|
||||||
extension = mimetype_or_extension
|
|
||||||
elif "/" in mimetype_or_extension:
|
|
||||||
extension = mimetypes.guess_extension(mimetype_or_extension) or ""
|
|
||||||
else:
|
|
||||||
extension = f".{mimetype_or_extension.lstrip('.')}"
|
|
||||||
if extension == ".jpe":
|
|
||||||
return ".jpg"
|
|
||||||
return extension
|
|
||||||
guessed = Path(source_url).suffix
|
|
||||||
if guessed == ".jpe":
|
|
||||||
return ".jpg"
|
|
||||||
if guessed:
|
|
||||||
return guessed
|
|
||||||
return ".img"
|
|
||||||
|
|
||||||
|
|
||||||
def source_image_path(source_url: str, mimetype_or_extension: str | None = None) -> str:
|
|
||||||
extension = image_extension(mimetype_or_extension, source_url)
|
|
||||||
return f"source/{image_guid(source_url)}{extension}"
|
|
||||||
|
|
||||||
|
|
||||||
def published_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
|
|
||||||
return variant_media_path(f"full/{image_guid(source_url)}", profile, hashed=True)
|
|
||||||
|
|
||||||
|
|
||||||
def canonical_published_image_path(
|
|
||||||
source_url: str, profiles: Sequence[Mapping[str, Any]]
|
|
||||||
) -> str:
|
|
||||||
if not profiles:
|
|
||||||
raise ValueError("Missing image normalization profiles")
|
|
||||||
return published_image_path(source_url, profiles[0])
|
|
||||||
|
|
||||||
|
|
||||||
def thumbnail_image_path(source_url: str, profile: Mapping[str, Any]) -> str:
|
|
||||||
return variant_media_path(f"thumbs/{image_guid(source_url)}", profile, hashed=True)
|
|
||||||
|
|
||||||
|
|
||||||
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
def profile_settings_hash(profile: Mapping[str, Any]) -> str:
|
||||||
settings = {
|
settings = {
|
||||||
key: value
|
key: value
|
||||||
|
|
@ -109,8 +65,6 @@ def variant_media_path(
|
||||||
def published_media_path(
|
def published_media_path(
|
||||||
file_type: FileType, source_url: str, profile: Mapping[str, Any]
|
file_type: FileType, source_url: str, profile: Mapping[str, Any]
|
||||||
) -> str:
|
) -> str:
|
||||||
if file_type == FileType.IMAGE:
|
|
||||||
return published_image_path(source_url, profile)
|
|
||||||
if file_type == FileType.AUDIO:
|
if file_type == FileType.AUDIO:
|
||||||
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
|
return variant_media_path(local_audio_path(source_url), profile, hashed=True)
|
||||||
if file_type == FileType.VIDEO:
|
if file_type == FileType.VIDEO:
|
||||||
|
|
@ -125,8 +79,6 @@ def canonical_published_media_path(
|
||||||
raise ValueError(f"Missing transcode profiles for {file_type.value}")
|
raise ValueError(f"Missing transcode profiles for {file_type.value}")
|
||||||
# The first configured profile is the public URL contract. Reordering profiles
|
# The first configured profile is the public URL contract. Reordering profiles
|
||||||
# changes published URLs for already-mirrored media.
|
# changes published URLs for already-mirrored media.
|
||||||
if file_type == FileType.IMAGE:
|
|
||||||
return canonical_published_image_path(source_url, profiles)
|
|
||||||
return published_media_path(file_type, source_url, profiles[0])
|
return published_media_path(file_type, source_url, profiles[0])
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -224,46 +224,7 @@ def test_build_feed_settings_can_disable_image_and_video_conversion(
|
||||||
convert_video=False,
|
convert_video=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert "repub.pipelines.ImagePipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
"repub.pipelines.ImageNormalizePipeline" not in feed_settings["ITEM_PIPELINES"]
|
|
||||||
)
|
|
||||||
assert (
|
|
||||||
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
|
|
||||||
)
|
|
||||||
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
|
assert "repub.pipelines.VideoPipeline" not in feed_settings["ITEM_PIPELINES"]
|
||||||
assert feed_settings["REPUBLISHER_IMAGE_NORMALIZE_ENABLED"] is False
|
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 2
|
||||||
assert feed_settings["REPUBLISHER_IMAGE_THUMBNAILS_ENABLED"] is False
|
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 4
|
||||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.AudioPipeline"] == 3
|
|
||||||
assert feed_settings["ITEM_PIPELINES"]["repub.pipelines.FilePipeline"] == 5
|
|
||||||
|
|
||||||
|
|
||||||
def test_build_feed_settings_respects_image_pipeline_feature_flags(
|
|
||||||
tmp_path: Path,
|
|
||||||
) -> None:
|
|
||||||
out_dir = (tmp_path / "mirror").resolve()
|
|
||||||
config = RepublisherConfig(
|
|
||||||
config_path=tmp_path / "repub.toml",
|
|
||||||
out_dir=out_dir,
|
|
||||||
feeds=(
|
|
||||||
FeedConfig(
|
|
||||||
name="Guardian Project Podcast",
|
|
||||||
slug="gp-pod",
|
|
||||||
url="https://guardianproject.info/podcast/podcast.xml",
|
|
||||||
),
|
|
||||||
),
|
|
||||||
scrapy_settings={"REPUBLISHER_IMAGE_THUMBNAILS_ENABLED": False},
|
|
||||||
)
|
|
||||||
|
|
||||||
base_settings = build_base_settings(config)
|
|
||||||
feed_settings = build_feed_settings(
|
|
||||||
base_settings,
|
|
||||||
out_dir=out_dir,
|
|
||||||
feed_slug="gp-pod",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
|
||||||
feed_settings["ITEM_PIPELINES"]["repub.pipelines.ImageNormalizePipeline"] == 1
|
|
||||||
)
|
|
||||||
assert (
|
|
||||||
"repub.pipelines.ImageThumbnailPipeline" not in feed_settings["ITEM_PIPELINES"]
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -16,12 +16,10 @@ from repub.rss import nsmap
|
||||||
from repub.spiders.rss_spider import RssFeedSpider
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
canonical_published_image_path,
|
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
|
local_image_path,
|
||||||
local_video_path,
|
local_video_path,
|
||||||
published_image_path,
|
|
||||||
published_media_path,
|
published_media_path,
|
||||||
thumbnail_image_path,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
RSS_DATE_PATTERN = re.compile(
|
RSS_DATE_PATTERN = re.compile(
|
||||||
|
|
@ -46,7 +44,6 @@ def _serialize_feed(
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
"REPUBLISHER_FEED_URL": feed_url,
|
"REPUBLISHER_FEED_URL": feed_url,
|
||||||
|
|
@ -78,18 +75,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
source_video = "https://source.example/media/video.mp4"
|
source_video = "https://source.example/media/video.mp4"
|
||||||
channel_image = "https://source.example/media/channel.png"
|
channel_image = "https://source.example/media/channel.png"
|
||||||
item_image = "https://source.example/media/cover.jpg"
|
item_image = "https://source.example/media/cover.jpg"
|
||||||
image_main_path = published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
)
|
|
||||||
image_fallback_path = published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[1],
|
|
||||||
)
|
|
||||||
image_thumbnail_path = thumbnail_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
|
|
||||||
)
|
|
||||||
audio_base_path = local_audio_path(source_audio)
|
audio_base_path = local_audio_path(source_audio)
|
||||||
audio_default_path = published_media_path(
|
audio_default_path = published_media_path(
|
||||||
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
|
FileType.AUDIO, source_audio, repub_settings.REPUBLISHER_AUDIO[0]
|
||||||
|
|
@ -109,60 +94,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
def prepare_item(item: ElementItem) -> None:
|
def prepare_item(item: ElementItem) -> None:
|
||||||
item.images = [
|
|
||||||
{
|
|
||||||
"url": source_image,
|
|
||||||
"path": image_main_path,
|
|
||||||
"published_url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_main_path}",
|
|
||||||
),
|
|
||||||
"checksum": "image-default",
|
|
||||||
"status": "downloaded",
|
|
||||||
"source_path": "source/ignored.png",
|
|
||||||
"variants": [
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_main_path}",
|
|
||||||
),
|
|
||||||
"path": image_main_path,
|
|
||||||
"type": "image/webp",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "true",
|
|
||||||
"fileSize": "2345",
|
|
||||||
"width": "1200",
|
|
||||||
"height": "675",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_fallback_path}",
|
|
||||||
),
|
|
||||||
"path": image_fallback_path,
|
|
||||||
"type": "image/jpeg",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "false",
|
|
||||||
"fileSize": "3456",
|
|
||||||
"width": "1200",
|
|
||||||
"height": "675",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"thumbnails": [
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_thumbnail_path}",
|
|
||||||
),
|
|
||||||
"path": image_thumbnail_path,
|
|
||||||
"slot": "card_hero",
|
|
||||||
"type": "image/jpeg",
|
|
||||||
"width": "640",
|
|
||||||
"height": "360",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
item.audios = [
|
item.audios = [
|
||||||
{
|
{
|
||||||
"url": source_audio,
|
"url": source_audio,
|
||||||
|
|
@ -330,7 +261,6 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
||||||
<enclosure url="{source_audio}" length="123" type="audio/mpeg" />
|
<enclosure url="{source_audio}" length="123" type="audio/mpeg" />
|
||||||
<content:encoded><![CDATA[<div mode="body" querystring="x=1"><img src="{source_image}" contenteditable="true"></div>]]></content:encoded>
|
<content:encoded><![CDATA[<div mode="body" querystring="x=1"><img src="{source_image}" contenteditable="true"></div>]]></content:encoded>
|
||||||
<media:content url="{source_image}" type="image/jpeg" medium="image" expression="full" lang="en" />
|
|
||||||
<media:content url="{source_video}" type="video/mp4" medium="video" expression="full" duration="60" width="640" height="360" lang="en" />
|
<media:content url="{source_video}" type="video/mp4" medium="video" expression="full" duration="60" width="640" height="360" lang="en" />
|
||||||
<itunes:summary><![CDATA[{long_summary}]]></itunes:summary>
|
<itunes:summary><![CDATA[{long_summary}]]></itunes:summary>
|
||||||
<itunes:image href="{item_image}" />
|
<itunes:image href="{item_image}" />
|
||||||
|
|
@ -358,11 +288,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
assert last_build_date == item_pub_date
|
assert last_build_date == item_pub_date
|
||||||
assert channel.findtext("itunes:explicit", namespaces=nsmap) == "false"
|
assert channel.findtext("itunes:explicit", namespaces=nsmap) == "false"
|
||||||
assert channel.findtext("./image/url") == (
|
assert channel.findtext("./image/url") == (
|
||||||
"https://mirror.example/feeds/demo/images/"
|
f"https://mirror.example/feeds/demo/images/{local_image_path(channel_image)}"
|
||||||
+ canonical_published_image_path(
|
|
||||||
channel_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
atom_self = channel.find("atom:link", namespaces=nsmap)
|
atom_self = channel.find("atom:link", namespaces=nsmap)
|
||||||
|
|
@ -392,63 +318,9 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
|
assert root.find("./channel/item/media:content", namespaces=nsmap) is None
|
||||||
|
|
||||||
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
|
media_groups = root.findall("./channel/item/media:group", namespaces=nsmap)
|
||||||
assert len(media_groups) == 3
|
assert len(media_groups) == 2
|
||||||
|
|
||||||
image_group = next(
|
|
||||||
group
|
|
||||||
for group in media_groups
|
|
||||||
if group.find("media:thumbnail", namespaces=nsmap) is not None
|
|
||||||
)
|
|
||||||
audio_group = next(
|
|
||||||
group
|
|
||||||
for group in media_groups
|
|
||||||
if group.findall("media:content", namespaces=nsmap)
|
|
||||||
and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "audio"
|
|
||||||
)
|
|
||||||
video_group = next(
|
|
||||||
group
|
|
||||||
for group in media_groups
|
|
||||||
if group.findall("media:content", namespaces=nsmap)
|
|
||||||
and group.findall("media:content", namespaces=nsmap)[0].get("medium") == "video"
|
|
||||||
)
|
|
||||||
|
|
||||||
image_variants = image_group.findall("media:content", namespaces=nsmap)
|
|
||||||
assert [variant.attrib for variant in image_variants] == [
|
|
||||||
{
|
|
||||||
"url": (f"https://mirror.example/feeds/demo/images/" f"{image_main_path}"),
|
|
||||||
"type": "image/webp",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "true",
|
|
||||||
"expression": "full",
|
|
||||||
"lang": "en",
|
|
||||||
"height": "675",
|
|
||||||
"width": "1200",
|
|
||||||
"fileSize": "2345",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": (
|
|
||||||
f"https://mirror.example/feeds/demo/images/" f"{image_fallback_path}"
|
|
||||||
),
|
|
||||||
"type": "image/jpeg",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "false",
|
|
||||||
"expression": "full",
|
|
||||||
"lang": "en",
|
|
||||||
"height": "675",
|
|
||||||
"width": "1200",
|
|
||||||
"fileSize": "3456",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
thumbnails = image_group.findall("media:thumbnail", namespaces=nsmap)
|
|
||||||
assert len(thumbnails) == 1
|
|
||||||
assert thumbnails[0].attrib == {
|
|
||||||
"url": (f"https://mirror.example/feeds/demo/images/" f"{image_thumbnail_path}"),
|
|
||||||
"width": "640",
|
|
||||||
"height": "360",
|
|
||||||
f"{{{nsmap['anynews']}}}slot": "card_hero",
|
|
||||||
f"{{{nsmap['anynews']}}}type": "image/jpeg",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
audio_group, video_group = media_groups
|
||||||
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
|
audio_variants = audio_group.findall("media:content", namespaces=nsmap)
|
||||||
assert [variant.attrib for variant in audio_variants] == [
|
assert [variant.attrib for variant in audio_variants] == [
|
||||||
{
|
{
|
||||||
|
|
@ -556,13 +428,7 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
|
||||||
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
itunes_image = root.find("./channel/item/itunes:image", namespaces=nsmap)
|
||||||
assert itunes_image is not None
|
assert itunes_image is not None
|
||||||
assert itunes_image.attrib == {
|
assert itunes_image.attrib == {
|
||||||
"href": (
|
"href": f"https://mirror.example/feeds/demo/images/{local_image_path(item_image)}"
|
||||||
"https://mirror.example/feeds/demo/images/"
|
|
||||||
+ canonical_published_image_path(
|
|
||||||
item_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
itunes_summary = root.findtext("./channel/item/itunes:summary", namespaces=nsmap)
|
itunes_summary = root.findtext("./channel/item/itunes:summary", namespaces=nsmap)
|
||||||
|
|
@ -628,165 +494,3 @@ def test_item_body_uses_description_only_when_content_is_also_present() -> None:
|
||||||
assert both_present.findtext("content:encoded", namespaces=nsmap) == (
|
assert both_present.findtext("content:encoded", namespaces=nsmap) == (
|
||||||
"<div>Full body</div>"
|
"<div>Full body</div>"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_exporter_does_not_emit_media_rss_for_inline_only_images() -> None:
|
|
||||||
source_image = "https://source.example/media/inline.jpg"
|
|
||||||
|
|
||||||
def prepare_item(item: ElementItem) -> None:
|
|
||||||
item.images = [
|
|
||||||
{
|
|
||||||
"url": source_image,
|
|
||||||
"path": published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
),
|
|
||||||
"published_url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
"images/"
|
|
||||||
+ published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"checksum": "inline-image",
|
|
||||||
"status": "downloaded",
|
|
||||||
"source_path": "source/inline.jpg",
|
|
||||||
"variants": [
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
"images/"
|
|
||||||
+ published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
),
|
|
||||||
),
|
|
||||||
"path": published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
),
|
|
||||||
"type": "image/webp",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "true",
|
|
||||||
"width": "1200",
|
|
||||||
"height": "675",
|
|
||||||
"fileSize": "2345",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thumbnails": [],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
_, root = _serialize_feed(
|
|
||||||
feed_url="https://mirror.example",
|
|
||||||
prepare_item=prepare_item,
|
|
||||||
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<rss version="2.0"
|
|
||||||
xmlns:content="http://purl.org/rss/1.0/modules/content/">
|
|
||||||
<channel>
|
|
||||||
<title>Demo Feed</title>
|
|
||||||
<link>https://source.example/feed</link>
|
|
||||||
<description>Demo description</description>
|
|
||||||
<item>
|
|
||||||
<title>Inline Image Only</title>
|
|
||||||
<link>https://source.example/inline</link>
|
|
||||||
<guid isPermaLink="false">inline-only</guid>
|
|
||||||
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
|
||||||
<content:encoded><![CDATA[<div><img src="{source_image}"></div>]]></content:encoded>
|
|
||||||
</item>
|
|
||||||
</channel>
|
|
||||||
</rss>
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
assert root.findall("./channel/item/media:group", namespaces=nsmap) == []
|
|
||||||
|
|
||||||
|
|
||||||
def test_exporter_replaces_standalone_source_media_thumbnails() -> None:
|
|
||||||
source_image = "https://source.example/media/photo.jpg"
|
|
||||||
image_main_path = published_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE[0],
|
|
||||||
)
|
|
||||||
image_thumbnail_path = thumbnail_image_path(
|
|
||||||
source_image,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0],
|
|
||||||
)
|
|
||||||
|
|
||||||
def prepare_item(item: ElementItem) -> None:
|
|
||||||
item.images = [
|
|
||||||
{
|
|
||||||
"url": source_image,
|
|
||||||
"path": image_main_path,
|
|
||||||
"published_url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_main_path}",
|
|
||||||
),
|
|
||||||
"checksum": "image-default",
|
|
||||||
"status": "downloaded",
|
|
||||||
"source_path": "source/ignored.png",
|
|
||||||
"variants": [
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_main_path}",
|
|
||||||
),
|
|
||||||
"path": image_main_path,
|
|
||||||
"type": "image/webp",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "true",
|
|
||||||
"fileSize": "2345",
|
|
||||||
"width": "1200",
|
|
||||||
"height": "675",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"thumbnails": [
|
|
||||||
{
|
|
||||||
"url": _published_url(
|
|
||||||
"https://mirror.example",
|
|
||||||
f"images/{image_thumbnail_path}",
|
|
||||||
),
|
|
||||||
"path": image_thumbnail_path,
|
|
||||||
"slot": "card_hero",
|
|
||||||
"type": "image/jpeg",
|
|
||||||
"width": "640",
|
|
||||||
"height": "360",
|
|
||||||
}
|
|
||||||
],
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
_, root = _serialize_feed(
|
|
||||||
feed_url="https://mirror.example",
|
|
||||||
prepare_item=prepare_item,
|
|
||||||
feed_text=f"""<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<rss version="2.0"
|
|
||||||
xmlns:media="http://search.yahoo.com/mrss/">
|
|
||||||
<channel>
|
|
||||||
<title>Demo Feed</title>
|
|
||||||
<link>https://source.example/feed</link>
|
|
||||||
<description>Demo description</description>
|
|
||||||
<item>
|
|
||||||
<title>Entry One</title>
|
|
||||||
<link>https://source.example/entry-1</link>
|
|
||||||
<guid isPermaLink="false">entry-1</guid>
|
|
||||||
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
|
|
||||||
<media:content url="{source_image}" type="image/jpeg" medium="image" />
|
|
||||||
<media:thumbnail url="https://source.example/media/source-thumb.jpg" width="10" height="10" />
|
|
||||||
</item>
|
|
||||||
</channel>
|
|
||||||
</rss>
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
|
|
||||||
thumbnails = root.findall("./channel/item/media:thumbnail", namespaces=nsmap)
|
|
||||||
assert thumbnails == []
|
|
||||||
group_thumbnails = root.findall(
|
|
||||||
"./channel/item/media:group/media:thumbnail",
|
|
||||||
namespaces=nsmap,
|
|
||||||
)
|
|
||||||
assert len(group_thumbnails) == 1
|
|
||||||
assert group_thumbnails[0].get("url") == (
|
|
||||||
f"https://mirror.example/feeds/demo/images/{image_thumbnail_path}"
|
|
||||||
)
|
|
||||||
|
|
|
||||||
|
|
@ -8,13 +8,10 @@ from repub import settings as repub_settings
|
||||||
from repub.spiders.rss_spider import RssFeedSpider
|
from repub.spiders.rss_spider import RssFeedSpider
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
canonical_published_image_path,
|
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
local_image_path,
|
local_image_path,
|
||||||
local_video_path,
|
local_video_path,
|
||||||
published_image_path,
|
|
||||||
published_media_path,
|
published_media_path,
|
||||||
thumbnail_image_path,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -60,17 +57,14 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
assert spider.rewrite_image_url(
|
assert (
|
||||||
"https://example.com/media/photo.jpg"
|
spider.rewrite_image_url("https://example.com/media/photo.jpg")
|
||||||
) == "images/" + canonical_published_image_path(
|
== f"images/{local_image_path('https://example.com/media/photo.jpg')}"
|
||||||
"https://example.com/media/photo.jpg",
|
|
||||||
repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
)
|
)
|
||||||
assert spider.rewrite_file_url(
|
assert spider.rewrite_file_url(
|
||||||
FileType.AUDIO,
|
FileType.AUDIO,
|
||||||
|
|
@ -96,28 +90,6 @@ def test_rss_spider_rewrites_public_asset_urls_as_relative_paths() -> None:
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_rss_spider_keeps_legacy_image_paths_when_image_normalization_disabled() -> (
|
|
||||||
None
|
|
||||||
):
|
|
||||||
spider = RssFeedSpider(feed_name="demo", url="https://example.com/feed.rss")
|
|
||||||
spider.settings = Settings(
|
|
||||||
values={
|
|
||||||
"REPUBLISHER_IMAGE_DIR": "images",
|
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
|
||||||
"REPUBLISHER_IMAGE_NORMALIZE_ENABLED": False,
|
|
||||||
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
assert spider.rewrite_image_url("https://example.com/media/photo.jpg") == (
|
|
||||||
f"images/{local_image_path('https://example.com/media/photo.jpg')}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_published_media_path_changes_when_profile_args_change() -> None:
|
def test_published_media_path_changes_when_profile_args_change() -> None:
|
||||||
source_url = "https://example.com/media/clip.mp4"
|
source_url = "https://example.com/media/clip.mp4"
|
||||||
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
|
audio_profile = repub_settings.REPUBLISHER_AUDIO[0]
|
||||||
|
|
@ -141,41 +113,6 @@ def test_published_media_path_changes_when_profile_args_change() -> None:
|
||||||
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
) != published_media_path(FileType.VIDEO, source_url, base_profile)
|
||||||
|
|
||||||
|
|
||||||
def test_published_image_and_thumbnail_paths_change_when_profile_args_change() -> None:
|
|
||||||
source_url = "https://example.com/media/photo.png"
|
|
||||||
base_image_profile = repub_settings.REPUBLISHER_IMAGE[0]
|
|
||||||
base_thumbnail_profile = repub_settings.REPUBLISHER_IMAGE_THUMBNAILS[0]
|
|
||||||
|
|
||||||
assert canonical_published_image_path(
|
|
||||||
source_url,
|
|
||||||
repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
) == published_image_path(source_url, base_image_profile)
|
|
||||||
|
|
||||||
changed_image_profile = {
|
|
||||||
**base_image_profile,
|
|
||||||
"transform_kwargs": {
|
|
||||||
**base_image_profile["transform_kwargs"],
|
|
||||||
"width": 2048,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
assert published_image_path(
|
|
||||||
source_url,
|
|
||||||
changed_image_profile,
|
|
||||||
) != published_image_path(source_url, base_image_profile)
|
|
||||||
|
|
||||||
changed_thumbnail_profile = {
|
|
||||||
**base_thumbnail_profile,
|
|
||||||
"save_kwargs": {
|
|
||||||
**base_thumbnail_profile["save_kwargs"],
|
|
||||||
"Q": 60,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
assert thumbnail_image_path(
|
|
||||||
source_url,
|
|
||||||
changed_thumbnail_profile,
|
|
||||||
) != thumbnail_image_path(source_url, base_thumbnail_profile)
|
|
||||||
|
|
||||||
|
|
||||||
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||||
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
feed_text = """<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
||||||
|
|
@ -201,7 +138,6 @@ def test_rss_spider_keeps_items_with_empty_content_encoded() -> None:
|
||||||
"REPUBLISHER_FILE_DIR": "files",
|
"REPUBLISHER_FILE_DIR": "files",
|
||||||
"REPUBLISHER_AUDIO_DIR": "audio",
|
"REPUBLISHER_AUDIO_DIR": "audio",
|
||||||
"REPUBLISHER_VIDEO_DIR": "video",
|
"REPUBLISHER_VIDEO_DIR": "video",
|
||||||
"REPUBLISHER_IMAGE": repub_settings.REPUBLISHER_IMAGE,
|
|
||||||
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
"REPUBLISHER_AUDIO": repub_settings.REPUBLISHER_AUDIO,
|
||||||
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
"REPUBLISHER_VIDEO": repub_settings.REPUBLISHER_VIDEO,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ from types import SimpleNamespace
|
||||||
from typing import Any, cast
|
from typing import Any, cast
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import pyvips
|
|
||||||
from scrapy.crawler import Crawler
|
from scrapy.crawler import Crawler
|
||||||
from scrapy.http import Request, Response
|
from scrapy.http import Request, Response
|
||||||
|
|
||||||
|
|
@ -17,23 +16,12 @@ from repub.config import (
|
||||||
build_feed_settings,
|
build_feed_settings,
|
||||||
)
|
)
|
||||||
from repub.items import ElementItem
|
from repub.items import ElementItem
|
||||||
from repub.pipelines import (
|
from repub.pipelines import AudioPipeline, FilePipeline, VideoPipeline
|
||||||
AudioPipeline,
|
|
||||||
FilePipeline,
|
|
||||||
ImageNormalizePipeline,
|
|
||||||
ImageThumbnailPipeline,
|
|
||||||
VideoPipeline,
|
|
||||||
image_mimetype,
|
|
||||||
)
|
|
||||||
from repub.utils import (
|
from repub.utils import (
|
||||||
FileType,
|
FileType,
|
||||||
canonical_published_image_path,
|
|
||||||
local_audio_path,
|
local_audio_path,
|
||||||
local_video_path,
|
local_video_path,
|
||||||
published_image_path,
|
|
||||||
published_media_path,
|
published_media_path,
|
||||||
source_image_path,
|
|
||||||
thumbnail_image_path,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -57,33 +45,17 @@ def build_test_crawler(tmp_path: Path) -> SimpleNamespace:
|
||||||
return SimpleNamespace(settings=settings, request_fingerprinter=object())
|
return SimpleNamespace(settings=settings, request_fingerprinter=object())
|
||||||
|
|
||||||
|
|
||||||
class HashableSpiderInfo:
|
|
||||||
__hash__ = object.__hash__
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.spider = SimpleNamespace()
|
|
||||||
|
|
||||||
|
|
||||||
def spider_info() -> Any:
|
def spider_info() -> Any:
|
||||||
return HashableSpiderInfo()
|
return SimpleNamespace(spider=SimpleNamespace())
|
||||||
|
|
||||||
|
|
||||||
def store_dir(pipeline: Any) -> Path:
|
def store_dir(pipeline: Any) -> Path:
|
||||||
return Path(cast(Any, pipeline.store).basedir)
|
return Path(cast(Any, pipeline.store).basedir)
|
||||||
|
|
||||||
|
|
||||||
def transparent_png_bytes() -> bytes:
|
|
||||||
return cast(Any, pyvips.Image.black(2, 3, bands=4)).pngsave_buffer()
|
|
||||||
|
|
||||||
|
|
||||||
def png_bytes(width: int, height: int, *, bands: int = 4) -> bytes:
|
|
||||||
return cast(Any, pyvips.Image.black(width, height, bands=bands)).pngsave_buffer()
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("pipeline_cls", "store_setting"),
|
("pipeline_cls", "store_setting"),
|
||||||
[
|
[
|
||||||
(ImageNormalizePipeline, "IMAGES_STORE"),
|
|
||||||
(AudioPipeline, "AUDIO_STORE"),
|
(AudioPipeline, "AUDIO_STORE"),
|
||||||
(VideoPipeline, "VIDEO_STORE"),
|
(VideoPipeline, "VIDEO_STORE"),
|
||||||
(FilePipeline, "FILES_STORE"),
|
(FilePipeline, "FILES_STORE"),
|
||||||
|
|
@ -658,220 +630,6 @@ def test_audio_pipeline_media_downloaded_returns_canonical_file_info_and_variant
|
||||||
assert completed_item.audios == [result]
|
assert completed_item.audios == [result]
|
||||||
|
|
||||||
|
|
||||||
def test_image_mimetype_does_not_guess_from_url_extension() -> None:
|
|
||||||
assert image_mimetype(url="https://example.com/photo.jpg") is None
|
|
||||||
|
|
||||||
|
|
||||||
def test_image_normalize_pipeline_media_downloaded_persists_source_and_variants(
|
|
||||||
monkeypatch, tmp_path: Path
|
|
||||||
) -> None:
|
|
||||||
crawler = build_test_crawler(tmp_path)
|
|
||||||
pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
|
||||||
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
|
||||||
source_url = "https://example.com/photo.png"
|
|
||||||
item = ElementItem(
|
|
||||||
feed_name="nasa",
|
|
||||||
el=None,
|
|
||||||
image_urls=[source_url],
|
|
||||||
images=[],
|
|
||||||
file_urls=[],
|
|
||||||
files=[],
|
|
||||||
audio_urls=[],
|
|
||||||
audios=[],
|
|
||||||
video_urls=[],
|
|
||||||
videos=[],
|
|
||||||
)
|
|
||||||
canonical_path = canonical_published_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE"],
|
|
||||||
)
|
|
||||||
source_path = source_image_path(source_url, "image/png")
|
|
||||||
webp_path = published_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE"][0],
|
|
||||||
)
|
|
||||||
jpeg_path = published_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE"][1],
|
|
||||||
)
|
|
||||||
source_body = transparent_png_bytes()
|
|
||||||
|
|
||||||
result = pipeline.media_downloaded(
|
|
||||||
Response(
|
|
||||||
url=source_url,
|
|
||||||
body=source_body,
|
|
||||||
status=200,
|
|
||||||
headers={"Content-Type": "image/png"},
|
|
||||||
),
|
|
||||||
Request(source_url),
|
|
||||||
spider_info(),
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
webp_file_size = result["variants"][0].get("fileSize")
|
|
||||||
jpeg_file_size = result["variants"][1].get("fileSize")
|
|
||||||
|
|
||||||
assert result == {
|
|
||||||
"url": source_url,
|
|
||||||
"path": canonical_path,
|
|
||||||
"published_url": f"https://mirror.example/feeds/nasa/images/{canonical_path}",
|
|
||||||
"checksum": result["checksum"],
|
|
||||||
"status": "downloaded",
|
|
||||||
"source_path": source_path,
|
|
||||||
"variants": [
|
|
||||||
{
|
|
||||||
"url": f"https://mirror.example/feeds/nasa/images/{webp_path}",
|
|
||||||
"path": webp_path,
|
|
||||||
"type": "image/webp",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "true",
|
|
||||||
"fileSize": webp_file_size,
|
|
||||||
"width": 2,
|
|
||||||
"height": 3,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"url": f"https://mirror.example/feeds/nasa/images/{jpeg_path}",
|
|
||||||
"path": jpeg_path,
|
|
||||||
"type": "image/jpeg",
|
|
||||||
"medium": "image",
|
|
||||||
"isDefault": "false",
|
|
||||||
"fileSize": jpeg_file_size,
|
|
||||||
"width": 2,
|
|
||||||
"height": 3,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
"thumbnails": [],
|
|
||||||
}
|
|
||||||
assert isinstance(result["checksum"], str)
|
|
||||||
assert isinstance(webp_file_size, int)
|
|
||||||
assert isinstance(jpeg_file_size, int)
|
|
||||||
assert (store_dir(pipeline) / source_path).read_bytes() == source_body
|
|
||||||
webp_image = cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.new_from_file(str(store_dir(pipeline) / webp_path)),
|
|
||||||
)
|
|
||||||
jpeg_image = cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.new_from_file(str(store_dir(pipeline) / jpeg_path)),
|
|
||||||
)
|
|
||||||
assert (webp_image.width, webp_image.height) == (2, 3)
|
|
||||||
assert (jpeg_image.width, jpeg_image.height) == (2, 3)
|
|
||||||
assert jpeg_image.bands == 3
|
|
||||||
|
|
||||||
completed_item = pipeline.item_completed([(True, result)], item, spider_info())
|
|
||||||
assert completed_item.images == [result]
|
|
||||||
|
|
||||||
|
|
||||||
def test_image_thumbnail_pipeline_generates_named_thumbnails_from_source_image(
|
|
||||||
monkeypatch, tmp_path: Path
|
|
||||||
) -> None:
|
|
||||||
crawler = build_test_crawler(tmp_path)
|
|
||||||
normalize_pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
|
||||||
thumbnail_pipeline = ImageThumbnailPipeline.from_crawler(cast(Crawler, crawler))
|
|
||||||
monkeypatch.setattr(normalize_pipeline, "inc_stats", lambda status: None)
|
|
||||||
source_url = "https://example.com/photo.png"
|
|
||||||
source_body = png_bytes(1200, 900)
|
|
||||||
item = ElementItem(
|
|
||||||
feed_name="nasa",
|
|
||||||
el=None,
|
|
||||||
image_urls=[source_url],
|
|
||||||
images=[],
|
|
||||||
file_urls=[],
|
|
||||||
files=[],
|
|
||||||
audio_urls=[],
|
|
||||||
audios=[],
|
|
||||||
video_urls=[],
|
|
||||||
videos=[],
|
|
||||||
)
|
|
||||||
|
|
||||||
normalized = normalize_pipeline.media_downloaded(
|
|
||||||
Response(
|
|
||||||
url=source_url,
|
|
||||||
body=source_body,
|
|
||||||
status=200,
|
|
||||||
headers={"Content-Type": "image/png"},
|
|
||||||
),
|
|
||||||
Request(source_url),
|
|
||||||
spider_info(),
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
item.images = [normalized]
|
|
||||||
|
|
||||||
processed = thumbnail_pipeline.process_item(item, spider_info().spider)
|
|
||||||
thumbnails = processed.images[0]["thumbnails"]
|
|
||||||
thumb_slots = [thumb.get("slot") for thumb in thumbnails]
|
|
||||||
first_thumb = thumbnails[0]
|
|
||||||
second_thumb = thumbnails[1]
|
|
||||||
|
|
||||||
assert processed.images[0]["path"] == canonical_published_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE"],
|
|
||||||
)
|
|
||||||
assert thumb_slots == ["card_hero", "list_square"]
|
|
||||||
assert first_thumb.get("path") == thumbnail_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][0],
|
|
||||||
)
|
|
||||||
assert first_thumb.get("type") == "image/jpeg"
|
|
||||||
assert first_thumb.get("width") == 640
|
|
||||||
assert first_thumb.get("height") == 360
|
|
||||||
assert second_thumb.get("path") == thumbnail_image_path(
|
|
||||||
source_url,
|
|
||||||
crawler.settings["REPUBLISHER_IMAGE_THUMBNAILS"][1],
|
|
||||||
)
|
|
||||||
assert second_thumb.get("width") == 160
|
|
||||||
assert second_thumb.get("height") == 160
|
|
||||||
for thumb in thumbnails:
|
|
||||||
thumb_path = thumb.get("path")
|
|
||||||
thumb_width = thumb.get("width")
|
|
||||||
thumb_height = thumb.get("height")
|
|
||||||
thumb_image = cast(
|
|
||||||
Any,
|
|
||||||
pyvips.Image.new_from_file(
|
|
||||||
str(store_dir(normalize_pipeline) / str(thumb_path))
|
|
||||||
),
|
|
||||||
)
|
|
||||||
assert (thumb_image.width, thumb_image.height) == (thumb_width, thumb_height)
|
|
||||||
|
|
||||||
|
|
||||||
def test_image_normalize_pipeline_cache_hit_keeps_persisted_source_path_for_extensionless_urls(
|
|
||||||
monkeypatch, tmp_path: Path
|
|
||||||
) -> None:
|
|
||||||
crawler = build_test_crawler(tmp_path)
|
|
||||||
pipeline = ImageNormalizePipeline.from_crawler(cast(Crawler, crawler))
|
|
||||||
monkeypatch.setattr(pipeline, "inc_stats", lambda status: None)
|
|
||||||
source_url = "https://example.com/photo"
|
|
||||||
item = ElementItem(
|
|
||||||
feed_name="nasa",
|
|
||||||
el=None,
|
|
||||||
image_urls=[source_url],
|
|
||||||
images=[],
|
|
||||||
file_urls=[],
|
|
||||||
files=[],
|
|
||||||
audio_urls=[],
|
|
||||||
audios=[],
|
|
||||||
video_urls=[],
|
|
||||||
videos=[],
|
|
||||||
)
|
|
||||||
|
|
||||||
downloaded = pipeline.media_downloaded(
|
|
||||||
Response(
|
|
||||||
url=source_url,
|
|
||||||
body=transparent_png_bytes(),
|
|
||||||
status=200,
|
|
||||||
headers={"Content-Type": "image/png"},
|
|
||||||
),
|
|
||||||
Request(source_url),
|
|
||||||
spider_info(),
|
|
||||||
item=item,
|
|
||||||
)
|
|
||||||
|
|
||||||
uptodate = pipeline.media_to_download(Request(source_url), spider_info(), item=item)
|
|
||||||
|
|
||||||
assert downloaded["source_path"].endswith(".png")
|
|
||||||
assert uptodate is not None
|
|
||||||
assert uptodate["source_path"] == downloaded["source_path"]
|
|
||||||
|
|
||||||
|
|
||||||
def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
def test_video_pipeline_media_downloaded_returns_canonical_file_info_and_variants(
|
||||||
monkeypatch, tmp_path: Path
|
monkeypatch, tmp_path: Path
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
|
||||||
32
uv.lock
generated
32
uv.lock
generated
|
|
@ -812,6 +812,25 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl", hash = "sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417", size = 411885, upload-time = "2026-01-07T17:24:58.33Z" },
|
{ url = "https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl", hash = "sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417", size = 411885, upload-time = "2026-01-07T17:24:58.33Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pillow"
|
||||||
|
version = "10.4.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/cd/74/ad3d526f3bf7b6d3f408b73fde271ec69dfac8b81341a318ce825f2b3812/pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06", size = 46555059, upload-time = "2024-07-01T09:48:43.583Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/c3/00/706cebe7c2c12a6318aabe5d354836f54adff7156fd9e1bd6c89f4ba0e98/pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3", size = 3525685, upload-time = "2024-07-01T09:46:45.194Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/cf/76/f658cbfa49405e5ecbfb9ba42d07074ad9792031267e782d409fd8fe7c69/pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb", size = 3374883, upload-time = "2024-07-01T09:46:47.331Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/46/2b/99c28c4379a85e65378211971c0b430d9c7234b1ec4d59b2668f6299e011/pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70", size = 4339837, upload-time = "2024-07-01T09:46:49.647Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f1/74/b1ec314f624c0c43711fdf0d8076f82d9d802afd58f1d62c2a86878e8615/pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be", size = 4455562, upload-time = "2024-07-01T09:46:51.811Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/4a/2a/4b04157cb7b9c74372fa867096a1607e6fedad93a44deeff553ccd307868/pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0", size = 4366761, upload-time = "2024-07-01T09:46:53.961Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ac/7b/8f1d815c1a6a268fe90481232c98dd0e5fa8c75e341a75f060037bd5ceae/pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc", size = 4536767, upload-time = "2024-07-01T09:46:56.664Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e5/77/05fa64d1f45d12c22c314e7b97398ffb28ef2813a485465017b7978b3ce7/pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a", size = 4477989, upload-time = "2024-07-01T09:46:58.977Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/12/63/b0397cfc2caae05c3fb2f4ed1b4fc4fc878f0243510a7a6034ca59726494/pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309", size = 4610255, upload-time = "2024-07-01T09:47:01.189Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/7b/f9/cfaa5082ca9bc4a6de66ffe1c12c2d90bf09c309a5f52b27759a596900e7/pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060", size = 2235603, upload-time = "2024-07-01T09:47:03.918Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/01/6a/30ff0eef6e0c0e71e55ded56a38d4859bf9d3634a94a88743897b5f96936/pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea", size = 2554972, upload-time = "2024-07-01T09:47:06.152Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/48/2c/2e0a52890f269435eee38b21c8218e102c621fe8d8df8b9dd06fabf879ba/pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d", size = 2243375, upload-time = "2024-07-01T09:47:09.065Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "platformdirs"
|
name = "platformdirs"
|
||||||
version = "4.9.4"
|
version = "4.9.4"
|
||||||
|
|
@ -993,15 +1012,6 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pyvips"
|
|
||||||
version = "3.1.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "cffi" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/2d/6a/282936de9faac6addf6bc8792c18e006489d0023ffd8856b8643f54d0558/pyvips-3.1.1.tar.gz", hash = "sha256:84fe744d023b1084ac2516bb17064cacd41c7f8aabf8e524dd383534941b9301", size = 56951, upload-time = "2025-12-09T18:38:06.355Z" }
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyyaml"
|
name = "pyyaml"
|
||||||
version = "6.0.3"
|
version = "6.0.3"
|
||||||
|
|
@ -1083,10 +1093,10 @@ dependencies = [
|
||||||
{ name = "hypercorn" },
|
{ name = "hypercorn" },
|
||||||
{ name = "lxml" },
|
{ name = "lxml" },
|
||||||
{ name = "peewee" },
|
{ name = "peewee" },
|
||||||
|
{ name = "pillow" },
|
||||||
{ name = "prometheus-client" },
|
{ name = "prometheus-client" },
|
||||||
{ name = "pygea" },
|
{ name = "pygea" },
|
||||||
{ name = "python-dateutil" },
|
{ name = "python-dateutil" },
|
||||||
{ name = "pyvips" },
|
|
||||||
{ name = "quart" },
|
{ name = "quart" },
|
||||||
{ name = "scrapy" },
|
{ name = "scrapy" },
|
||||||
]
|
]
|
||||||
|
|
@ -1116,10 +1126,10 @@ requires-dist = [
|
||||||
{ name = "hypercorn", specifier = ">=0.18.0,<0.19.0" },
|
{ name = "hypercorn", specifier = ">=0.18.0,<0.19.0" },
|
||||||
{ name = "lxml", specifier = ">=5.2.1,<6.0.0" },
|
{ name = "lxml", specifier = ">=5.2.1,<6.0.0" },
|
||||||
{ name = "peewee", specifier = ">=3.19.0,<4.0.0" },
|
{ name = "peewee", specifier = ">=3.19.0,<4.0.0" },
|
||||||
|
{ name = "pillow", specifier = ">=10.3.0,<11.0.0" },
|
||||||
{ name = "prometheus-client", specifier = ">=0.20.0,<0.21.0" },
|
{ name = "prometheus-client", specifier = ">=0.20.0,<0.21.0" },
|
||||||
{ name = "pygea", git = "https://guardianproject.dev/anynews/pygea.git" },
|
{ name = "pygea", git = "https://guardianproject.dev/anynews/pygea.git" },
|
||||||
{ name = "python-dateutil", specifier = ">=2.9.0.post0,<3.0.0" },
|
{ name = "python-dateutil", specifier = ">=2.9.0.post0,<3.0.0" },
|
||||||
{ name = "pyvips", specifier = ">=3.0.0,<4.0.0" },
|
|
||||||
{ name = "quart", specifier = ">=0.20.0,<0.21.0" },
|
{ name = "quart", specifier = ">=0.20.0,<0.21.0" },
|
||||||
{ name = "scrapy", specifier = ">=2.11.1,<3.0.0" },
|
{ name = "scrapy", specifier = ">=2.11.1,<3.0.0" },
|
||||||
]
|
]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue