diff --git a/repub/spiders/rss_spider.py b/repub/spiders/rss_spider.py
index 80be20e..fa27317 100644
--- a/repub/spiders/rss_spider.py
+++ b/repub/spiders/rss_spider.py
@@ -281,6 +281,14 @@ class RssFeedSpider(BaseRssFeedSpider):
file_urls = []
audio_urls = []
video_urls = []
+ source_description_html = (
+ sanitize_html(entry.get("summary", "")) if "summary_detail" in entry else ""
+ )
+ has_content_html = any(
+ c.type == "text/html" and ((getattr(c, "value", "") or "").strip() != "")
+ for c in entry.get("content", [])
+ )
+ description_html = source_description_html if has_content_html else ""
def add_url(file_type, url):
if file_type == FileType.IMAGE:
@@ -295,7 +303,7 @@ class RssFeedSpider(BaseRssFeedSpider):
item = E.item(
E.title(entry.get("title")),
E.link(entry.get("link")),
- E.description(sanitize_html(entry.get("description", ""))),
+ E.description(description_html),
E.guid(
entry.get("id"),
{"isPermaLink": "true" if entry.guidislink else "false"},
@@ -341,6 +349,8 @@ class RssFeedSpider(BaseRssFeedSpider):
image_urls.extend(urls[FileType.IMAGE])
video_urls.extend(urls[FileType.VIDEO])
audio_urls.extend(urls[FileType.AUDIO])
+ if not has_content_html and source_description_html.strip() != "":
+ item.append(CONTENT.encoded(CDATA(source_description_html)))
if isinstance(entry.get("media_content"), list):
for media in (
diff --git a/tests/test_feed_validation.py b/tests/test_feed_validation.py
index 290a90a..9e1f80b 100644
--- a/tests/test_feed_validation.py
+++ b/tests/test_feed_validation.py
@@ -437,10 +437,60 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert "<" not in itunes_summary
assert ">" not in itunes_summary
- assert "contenteditable=" not in xml
- assert "mode=" not in xml
- assert "querystring=" not in xml
- assert (
- f"https://mirror.example/feeds/demo/images/{local_image_path(source_image)}"
- in xml
+
+def test_item_body_uses_description_only_when_content_is_also_present() -> None:
+ xml, root = _serialize_feed(
+ feed_url="https://mirror.example",
+ feed_text="""
+
Description body
" + ) + + assert content_only.findtext("description") in (None, "") + assert content_only.findtext("content:encoded", namespaces=nsmap) == ( + "Summary body
" + assert both_present.findtext("content:encoded", namespaces=nsmap) == ( + "