import pygea.pangeaservice as pangeaservice ARTICLE_WITH_TWEET_SNIPPETS = """
Lead paragraph.
Middle paragraph.
Tail paragraph.
""".strip() SNIPPET_PAGE_58108 = """ """.strip() SNIPPET_PAGE_58109 = """ """.strip() def test_snippet_urls_from_article_content_finds_supported_infographics_snippets() -> ( None ): urls = pangeaservice.snippet_urls_from_article_content( ARTICLE_WITH_TWEET_SNIPPETS, "www.martinoticias.com" ) assert urls == [ "https://www.martinoticias.com/a/29036.html?parameterid=58108", "https://www.martinoticias.com/a/29036.html?parameterid=58109", ] def test_snippet_urls_from_article_content_ignores_malformed_or_unsupported_snippets() -> ( None ): raw_html = """No supported snippet target is present here.
""".strip() urls = pangeaservice.snippet_urls_from_article_content( raw_html, "www.martinoticias.com" ) assert urls == [] def test_extract_embed_html_from_snippet_page_returns_decoded_blockquote() -> None: embed_html = pangeaservice.extract_embed_html_from_snippet_page(SNIPPET_PAGE_58108) assert embed_html is not None assert "None: embed_html = pangeaservice.extract_embed_html_from_snippet_page( "" ) assert embed_html is None def test_resolve_content_snippets_replaces_supported_tweet_placeholders() -> None: def fetch_html(url: str) -> str | None: pages = { "https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108, "https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109, } return pages.get(url) resolved = pangeaservice.resolve_content_snippets( ARTICLE_WITH_TWEET_SNIPPETS, "www.martinoticias.com", fetch_html ) assert 'class="twitter-tweet"' in resolved assert "@HenryAlviarez" in resolved assert "@MariaCorinaYA" in resolved assert "?parameterid=58108" not in resolved assert "?parameterid=58109" not in resolved assert "Lead paragraph." in resolved assert "Tail paragraph." in resolved def test_resolve_content_snippets_leaves_placeholder_when_resolution_fails() -> None: resolved = pangeaservice.resolve_content_snippets( ARTICLE_WITH_TWEET_SNIPPETS, "www.martinoticias.com", lambda _url: None, ) assert 'mode="infographics|plain|29036|large||"' in resolved assert "?parameterid=58108" in resolved assert "?parameterid=58109" in resolved assert 'class="twitter-tweet"' not in resolved def test_rss_article_from_pangea_article_resolves_supported_snippets( monkeypatch, ) -> None: service = object.__new__(pangeaservice.PangeaService) service._verbose_p = False service._domain = "www.martinoticias.com" service._rev_categories = {} monkeypatch.setattr( pangeaservice.utilities, "get_media_metadata", lambda _url: None, ) monkeypatch.setattr( service, "_fetch_snippet_page_html", lambda url: { "https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108, "https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109, }.get(url), raising=False, ) rss_article = service.rss_article_from_pangea_article( { "url": "https://www.martinoticias.com/a/reabre-sede-del-partido-de-maria-corina-machado-en-caracas/453944.html", "title": "Reabre sede del partido de María Corina Machado en Caracas (VIDEO)", "content": ARTICLE_WITH_TWEET_SNIPPETS, "pubDate": "2026-03-29T16:29:38", } ) assert 'class="twitter-tweet"' in rss_article["content"] assert "@HenryAlviarez" in rss_article["content"] assert "@MariaCorinaYA" in rss_article["content"] def test_rss_article_from_pangea_article_uses_social_teaser_as_summary_fallback( monkeypatch, ) -> None: service = object.__new__(pangeaservice.PangeaService) service._verbose_p = False service._domain = "www.martinoticias.com" service._rev_categories = {} monkeypatch.setattr( pangeaservice.utilities, "get_media_metadata", lambda _url: None, ) monkeypatch.setattr( service, "_fetch_snippet_page_html", lambda _url: None, raising=False, ) rss_article = service.rss_article_from_pangea_article( { "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", "title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio', "introduction": "", "socialTeaserIntroduction": "Resumen corto para tarjetas y redes.", "content": "Contenido completo del articulo.
", "pubDate": "2026-04-01T13:30:32", } ) assert rss_article["summary"] == "Resumen corto para tarjetas y redes." def test_rss_article_from_pangea_article_prefers_introduction_over_social_teaser( monkeypatch, ) -> None: service = object.__new__(pangeaservice.PangeaService) service._verbose_p = False service._domain = "www.martinoticias.com" service._rev_categories = {} monkeypatch.setattr( pangeaservice.utilities, "get_media_metadata", lambda _url: None, ) monkeypatch.setattr( service, "_fetch_snippet_page_html", lambda _url: None, raising=False, ) rss_article = service.rss_article_from_pangea_article( { "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", "title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio', "introduction": "Introduccion canonica.", "socialTeaserIntroduction": "Resumen social.", "content": "Contenido completo del articulo.
", "pubDate": "2026-04-01T13:30:32", } ) assert rss_article["summary"] == "Introduccion canonica." def test_rss_article_from_pangea_article_uses_social_teaser_title_as_fallback( monkeypatch, ) -> None: service = object.__new__(pangeaservice.PangeaService) service._verbose_p = False service._domain = "www.martinoticias.com" service._rev_categories = {} monkeypatch.setattr( pangeaservice.utilities, "get_media_metadata", lambda _url: None, ) monkeypatch.setattr( service, "_fetch_snippet_page_html", lambda _url: None, raising=False, ) rss_article = service.rss_article_from_pangea_article( { "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", "title": "", "socialTeaserTitle": "Titulo para redes.", "content": "Contenido completo del articulo.
", "pubDate": "2026-04-01T13:30:32", } ) assert rss_article["title"] == "Titulo para redes." def test_rss_article_from_pangea_article_uses_social_teaser_image_as_fallback( monkeypatch, ) -> None: service = object.__new__(pangeaservice.PangeaService) service._verbose_p = False service._domain = "www.martinoticias.com" service._rev_categories = {} monkeypatch.setattr( pangeaservice.utilities, "get_media_metadata", lambda _url: None, ) monkeypatch.setattr( service, "_fetch_snippet_page_html", lambda _url: None, raising=False, ) rss_article = service.rss_article_from_pangea_article( { "url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html", "title": "Titulo canonico.", "image": "", "socialTeaserImage": "https://www.martinoticias.com/social.jpg", "content": "Contenido completo del articulo.
", "pubDate": "2026-04-01T13:30:32", } ) assert rss_article["enclosure"]["url"] == "https://www.martinoticias.com/social.jpg"