Prefer content over description for item bodies

This commit is contained in:
Abel Luck 2026-04-01 17:27:20 +02:00
parent 05ac6ce20d
commit cebf037753
2 changed files with 67 additions and 7 deletions

View file

@ -437,10 +437,60 @@ def test_feed_generation_normalizes_dates_urls_and_xml_shapes() -> None:
assert "<" not in itunes_summary
assert ">" not in itunes_summary
assert "contenteditable=" not in xml
assert "mode=" not in xml
assert "querystring=" not in xml
assert (
f"https://mirror.example/feeds/demo/images/{local_image_path(source_image)}"
in xml
def test_item_body_uses_description_only_when_content_is_also_present() -> None:
xml, root = _serialize_feed(
feed_url="https://mirror.example",
feed_text="""<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title>Demo Feed</title>
<link>https://source.example/feed</link>
<description>Demo description</description>
<item>
<title>Description Only</title>
<link>https://source.example/description-only</link>
<description><![CDATA[<p mode="summary">Description body</p>]]></description>
<guid isPermaLink="false">entry-description-only</guid>
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
</item>
<item>
<title>Content Only</title>
<link>https://source.example/content-only</link>
<guid isPermaLink="false">entry-content-only</guid>
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
<content:encoded><![CDATA[<div mode="body">Content body</div>]]></content:encoded>
</item>
<item>
<title>Both Present</title>
<link>https://source.example/both-present</link>
<description><![CDATA[<p mode="summary">Summary body</p>]]></description>
<guid isPermaLink="false">entry-both-present</guid>
<pubDate>Tue, 31 Mar 2026 10:31:50 +0000</pubDate>
<content:encoded><![CDATA[<div mode="body">Full body</div>]]></content:encoded>
</item>
</channel>
</rss>
""",
)
items = root.findall("./channel/item")
assert len(items) == 3
description_only, content_only, both_present = items
assert description_only.findtext("description") in (None, "")
assert description_only.findtext("content:encoded", namespaces=nsmap) == (
"<p>Description body</p>"
)
assert content_only.findtext("description") in (None, "")
assert content_only.findtext("content:encoded", namespaces=nsmap) == (
"<div>Content body</div>"
)
assert both_present.findtext("description") == "<p>Summary body</p>"
assert both_present.findtext("content:encoded", namespaces=nsmap) == (
"<div>Full body</div>"
)