diff --git a/pygea/pangeaservice.py b/pygea/pangeaservice.py index 5c4316d..fd9667c 100644 --- a/pygea/pangeaservice.py +++ b/pygea/pangeaservice.py @@ -321,11 +321,14 @@ class PangeaService: sh = hashlib.sha256() sh.update(article["url"].encode("utf8")) rss["guid"] = sh.hexdigest() - rss["title"] = article["title"] + rss["title"] = article.get("title") or article.get("socialTeaserTitle") rss["link"] = article["url"] - if article.get("introduction"): - rss["summary"] = article["introduction"] + summary = article.get("introduction") or article.get( + "socialTeaserIntroduction" + ) + if summary: + rss["summary"] = summary if article.get("authors"): as_str = "" @@ -335,17 +338,18 @@ class PangeaService: as_str = as_str[0 : (len(as_str) - 2)] rss["authors"] = as_str - if article.get("image"): + image = article.get("image") or article.get("socialTeaserImage") + if image: # Seek the enclosure details from the image's server - metadata = utilities.get_media_metadata(article["image"]) + metadata = utilities.get_media_metadata(image) if metadata: rss["enclosure"] = { - "url": article["image"], + "url": image, "type": metadata["content_type"], "length": metadata["content_length"], } else: - rss["enclosure"] = {"url": article["image"]} + rss["enclosure"] = {"url": image} if rss.get("enclosure"): if self._verbose_p: diff --git a/tests/test_pangeaservice_snippets.py b/tests/test_pangeaservice_snippets.py index b6ef7bc..41a2a79 100644 --- a/tests/test_pangeaservice_snippets.py +++ b/tests/test_pangeaservice_snippets.py @@ -1,4 +1,4 @@ -from pygea import pangeaservice +import pygea.pangeaservice as pangeaservice ARTICLE_WITH_TWEET_SNIPPETS = """
@@ -140,6 +140,7 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets( "https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108, "https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109, }.get(url), + raising=False, ) rss_article = service.rss_article_from_pangea_article( @@ -154,3 +155,138 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets( assert 'class="twitter-tweet"' in rss_article["content"] assert "@HenryAlviarez" in rss_article["content"] assert "@MariaCorinaYA" in rss_article["content"] + + +def test_rss_article_from_pangea_article_uses_social_teaser_as_summary_fallback( + monkeypatch, +) -> None: + service = object.__new__(pangeaservice.PangeaService) + service._verbose_p = False + service._domain = "www.martinoticias.com" + service._rev_categories = {} + + monkeypatch.setattr( + pangeaservice.utilities, + "get_media_metadata", + lambda _url: None, + ) + monkeypatch.setattr( + service, + "_fetch_snippet_page_html", + lambda _url: None, + raising=False, + ) + + rss_article = service.rss_article_from_pangea_article( + { + "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", + "title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio', + "introduction": "", + "socialTeaserIntroduction": "Resumen corto para tarjetas y redes.", + "content": "Contenido completo del articulo.
", + "pubDate": "2026-04-01T13:30:32", + } + ) + + assert rss_article["summary"] == "Resumen corto para tarjetas y redes." + + +def test_rss_article_from_pangea_article_prefers_introduction_over_social_teaser( + monkeypatch, +) -> None: + service = object.__new__(pangeaservice.PangeaService) + service._verbose_p = False + service._domain = "www.martinoticias.com" + service._rev_categories = {} + + monkeypatch.setattr( + pangeaservice.utilities, + "get_media_metadata", + lambda _url: None, + ) + monkeypatch.setattr( + service, + "_fetch_snippet_page_html", + lambda _url: None, + raising=False, + ) + + rss_article = service.rss_article_from_pangea_article( + { + "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", + "title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio', + "introduction": "Introduccion canonica.", + "socialTeaserIntroduction": "Resumen social.", + "content": "Contenido completo del articulo.
", + "pubDate": "2026-04-01T13:30:32", + } + ) + + assert rss_article["summary"] == "Introduccion canonica." + + +def test_rss_article_from_pangea_article_uses_social_teaser_title_as_fallback( + monkeypatch, +) -> None: + service = object.__new__(pangeaservice.PangeaService) + service._verbose_p = False + service._domain = "www.martinoticias.com" + service._rev_categories = {} + + monkeypatch.setattr( + pangeaservice.utilities, + "get_media_metadata", + lambda _url: None, + ) + monkeypatch.setattr( + service, + "_fetch_snippet_page_html", + lambda _url: None, + raising=False, + ) + + rss_article = service.rss_article_from_pangea_article( + { + "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", + "title": "", + "socialTeaserTitle": "Titulo para redes.", + "content": "Contenido completo del articulo.
", + "pubDate": "2026-04-01T13:30:32", + } + ) + + assert rss_article["title"] == "Titulo para redes." + + +def test_rss_article_from_pangea_article_uses_social_teaser_image_as_fallback( + monkeypatch, +) -> None: + service = object.__new__(pangeaservice.PangeaService) + service._verbose_p = False + service._domain = "www.martinoticias.com" + service._rev_categories = {} + + monkeypatch.setattr( + pangeaservice.utilities, + "get_media_metadata", + lambda _url: None, + ) + monkeypatch.setattr( + service, + "_fetch_snippet_page_html", + lambda _url: None, + raising=False, + ) + + rss_article = service.rss_article_from_pangea_article( + { + "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", + "title": "Titulo canonico.", + "image": "", + "socialTeaserImage": "https://www.martinoticias.com/social.jpg", + "content": "Contenido completo del articulo.
", + "pubDate": "2026-04-01T13:30:32", + } + ) + + assert rss_article["enclosure"]["url"] == "https://www.martinoticias.com/social.jpg"