Prefer content over description for item bodies

This commit is contained in:
Abel Luck 2026-04-01 17:27:20 +02:00
parent 05ac6ce20d
commit cebf037753
2 changed files with 67 additions and 7 deletions

View file

@ -281,6 +281,14 @@ class RssFeedSpider(BaseRssFeedSpider):
file_urls = []
audio_urls = []
video_urls = []
source_description_html = (
sanitize_html(entry.get("summary", "")) if "summary_detail" in entry else ""
)
has_content_html = any(
c.type == "text/html" and ((getattr(c, "value", "") or "").strip() != "")
for c in entry.get("content", [])
)
description_html = source_description_html if has_content_html else ""
def add_url(file_type, url):
if file_type == FileType.IMAGE:
@ -295,7 +303,7 @@ class RssFeedSpider(BaseRssFeedSpider):
item = E.item(
E.title(entry.get("title")),
E.link(entry.get("link")),
E.description(sanitize_html(entry.get("description", ""))),
E.description(description_html),
E.guid(
entry.get("id"),
{"isPermaLink": "true" if entry.guidislink else "false"},
@ -341,6 +349,8 @@ class RssFeedSpider(BaseRssFeedSpider):
image_urls.extend(urls[FileType.IMAGE])
video_urls.extend(urls[FileType.VIDEO])
audio_urls.extend(urls[FileType.AUDIO])
if not has_content_html and source_description_html.strip() != "":
item.append(CONTENT.encoded(CDATA(source_description_html)))
if isinstance(entry.get("media_content"), list):
for media in (