Replace image pipeline with profile-driven variants
- add image normalization profiles and thumbnail profiles - generate source, full-size variant, and thumbnail image artifacts - rewrite canonical image URLs through the first configured profile - emit explicit image Media RSS groups with named thumbnails - preserve legacy image paths when image conversion is disabled - cover cache-hit source paths, inline image handling, and thumbnail export
This commit is contained in:
parent
7316d4723f
commit
525393272e
13 changed files with 1299 additions and 124 deletions
|
|
@ -21,6 +21,7 @@ from repub.rss import (
|
|||
)
|
||||
from repub.utils import (
|
||||
FileType,
|
||||
canonical_published_image_path,
|
||||
canonical_published_media_path,
|
||||
determine_file_type,
|
||||
local_file_path,
|
||||
|
|
@ -54,7 +55,16 @@ class BaseRssFeedSpider(Spider):
|
|||
local_path = local_file_path(url)
|
||||
if file_type == FileType.IMAGE:
|
||||
file_dir = self.settings["REPUBLISHER_IMAGE_DIR"]
|
||||
local_path = local_image_path(url)
|
||||
image_profiles = (
|
||||
self.settings.get("REPUBLISHER_IMAGE") or []
|
||||
if self.settings.getbool("REPUBLISHER_IMAGE_NORMALIZE_ENABLED", True)
|
||||
else []
|
||||
)
|
||||
local_path = (
|
||||
canonical_published_image_path(url, image_profiles)
|
||||
if image_profiles
|
||||
else local_image_path(url)
|
||||
)
|
||||
elif file_type == FileType.VIDEO:
|
||||
file_dir = self.settings["REPUBLISHER_VIDEO_DIR"]
|
||||
local_path = canonical_published_media_path(
|
||||
|
|
@ -278,6 +288,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
|
||||
def parse_entry(self, response, feed, entry):
|
||||
image_urls = []
|
||||
media_image_urls = []
|
||||
file_urls = []
|
||||
audio_urls = []
|
||||
video_urls = []
|
||||
|
|
@ -323,6 +334,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
)
|
||||
if entry.get("image"):
|
||||
image_urls.append(entry.get("image").href)
|
||||
media_image_urls.append(entry.get("image").href)
|
||||
for enc in entry.enclosures:
|
||||
url = enc.get("href")
|
||||
file_type = determine_file_type(url=url, mimetype=enc.get("type"))
|
||||
|
|
@ -381,6 +393,8 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
)
|
||||
)
|
||||
add_url(file_type, media.get("url"))
|
||||
if file_type == FileType.IMAGE:
|
||||
media_image_urls.append(media.get("url"))
|
||||
return ElementItem(
|
||||
feed_name=self.feed_name,
|
||||
el=item,
|
||||
|
|
@ -392,6 +406,7 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
audios=[],
|
||||
video_urls=video_urls,
|
||||
videos=[],
|
||||
media_image_urls=media_image_urls,
|
||||
)
|
||||
|
||||
WEBMASTER_VALUE = "support@guardianproject.info (Guardian Project)"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue