Handle empty Pangea HTML content

This commit is contained in:
Abel Luck 2026-03-30 18:37:50 +02:00
parent dab6b4568f
commit 2092f66dcd
2 changed files with 47 additions and 2 deletions

View file

@ -242,8 +242,9 @@ class RssFeedSpider(BaseRssFeedSpider):
if "content" in entry:
for c in entry.content:
if c.type == "text/html":
html, urls = self.munge_cdata_html(c.value)
raw_html = getattr(c, "value", "") or ""
if c.type == "text/html" and raw_html.strip() != "":
html, urls = self.munge_cdata_html(raw_html)
item.append(CONTENT.encoded(CDATA(html)))
image_urls.extend(urls[FileType.IMAGE])
video_urls.extend(urls[FileType.VIDEO])