Handle empty Pangea HTML content
This commit is contained in:
parent
dab6b4568f
commit
2092f66dcd
2 changed files with 47 additions and 2 deletions
|
|
@ -242,8 +242,9 @@ class RssFeedSpider(BaseRssFeedSpider):
|
|||
|
||||
if "content" in entry:
|
||||
for c in entry.content:
|
||||
if c.type == "text/html":
|
||||
html, urls = self.munge_cdata_html(c.value)
|
||||
raw_html = getattr(c, "value", "") or ""
|
||||
if c.type == "text/html" and raw_html.strip() != "":
|
||||
html, urls = self.munge_cdata_html(raw_html)
|
||||
item.append(CONTENT.encoded(CDATA(html)))
|
||||
image_urls.extend(urls[FileType.IMAGE])
|
||||
video_urls.extend(urls[FileType.VIDEO])
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue