Fallback to social teaser metadata

This commit is contained in:
Abel Luck 2026-04-01 17:21:23 +02:00
parent bff04afbf6
commit c58bac3abd
2 changed files with 148 additions and 8 deletions

View file

@ -1,4 +1,4 @@
from pygea import pangeaservice
import pygea.pangeaservice as pangeaservice
ARTICLE_WITH_TWEET_SNIPPETS = """
<div contenteditable="false" class="tag_image tag_snippet" mode="infographics|plain|389510|large||Trinity Audio Embed" querystring=""></div>
@ -140,6 +140,7 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets(
"https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108,
"https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109,
}.get(url),
raising=False,
)
rss_article = service.rss_article_from_pangea_article(
@ -154,3 +155,138 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets(
assert 'class="twitter-tweet"' in rss_article["content"]
assert "@HenryAlviarez" in rss_article["content"]
assert "@MariaCorinaYA" in rss_article["content"]
def test_rss_article_from_pangea_article_uses_social_teaser_as_summary_fallback(
monkeypatch,
) -> None:
service = object.__new__(pangeaservice.PangeaService)
service._verbose_p = False
service._domain = "www.martinoticias.com"
service._rev_categories = {}
monkeypatch.setattr(
pangeaservice.utilities,
"get_media_metadata",
lambda _url: None,
)
monkeypatch.setattr(
service,
"_fetch_snippet_page_html",
lambda _url: None,
raising=False,
)
rss_article = service.rss_article_from_pangea_article(
{
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
"title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio',
"introduction": "",
"socialTeaserIntroduction": "Resumen corto para tarjetas y redes.",
"content": "<p>Contenido completo del articulo.</p>",
"pubDate": "2026-04-01T13:30:32",
}
)
assert rss_article["summary"] == "Resumen corto para tarjetas y redes."
def test_rss_article_from_pangea_article_prefers_introduction_over_social_teaser(
monkeypatch,
) -> None:
service = object.__new__(pangeaservice.PangeaService)
service._verbose_p = False
service._domain = "www.martinoticias.com"
service._rev_categories = {}
monkeypatch.setattr(
pangeaservice.utilities,
"get_media_metadata",
lambda _url: None,
)
monkeypatch.setattr(
service,
"_fetch_snippet_page_html",
lambda _url: None,
raising=False,
)
rss_article = service.rss_article_from_pangea_article(
{
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
"title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio',
"introduction": "Introduccion canonica.",
"socialTeaserIntroduction": "Resumen social.",
"content": "<p>Contenido completo del articulo.</p>",
"pubDate": "2026-04-01T13:30:32",
}
)
assert rss_article["summary"] == "Introduccion canonica."
def test_rss_article_from_pangea_article_uses_social_teaser_title_as_fallback(
monkeypatch,
) -> None:
service = object.__new__(pangeaservice.PangeaService)
service._verbose_p = False
service._domain = "www.martinoticias.com"
service._rev_categories = {}
monkeypatch.setattr(
pangeaservice.utilities,
"get_media_metadata",
lambda _url: None,
)
monkeypatch.setattr(
service,
"_fetch_snippet_page_html",
lambda _url: None,
raising=False,
)
rss_article = service.rss_article_from_pangea_article(
{
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
"title": "",
"socialTeaserTitle": "Titulo para redes.",
"content": "<p>Contenido completo del articulo.</p>",
"pubDate": "2026-04-01T13:30:32",
}
)
assert rss_article["title"] == "Titulo para redes."
def test_rss_article_from_pangea_article_uses_social_teaser_image_as_fallback(
monkeypatch,
) -> None:
service = object.__new__(pangeaservice.PangeaService)
service._verbose_p = False
service._domain = "www.martinoticias.com"
service._rev_categories = {}
monkeypatch.setattr(
pangeaservice.utilities,
"get_media_metadata",
lambda _url: None,
)
monkeypatch.setattr(
service,
"_fetch_snippet_page_html",
lambda _url: None,
raising=False,
)
rss_article = service.rss_article_from_pangea_article(
{
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
"title": "Titulo canonico.",
"image": "",
"socialTeaserImage": "https://www.martinoticias.com/social.jpg",
"content": "<p>Contenido completo del articulo.</p>",
"pubDate": "2026-04-01T13:30:32",
}
)
assert rss_article["enclosure"]["url"] == "https://www.martinoticias.com/social.jpg"