Fallback to social teaser metadata
This commit is contained in:
parent
bff04afbf6
commit
c58bac3abd
2 changed files with 148 additions and 8 deletions
|
|
@ -321,11 +321,14 @@ class PangeaService:
|
||||||
sh = hashlib.sha256()
|
sh = hashlib.sha256()
|
||||||
sh.update(article["url"].encode("utf8"))
|
sh.update(article["url"].encode("utf8"))
|
||||||
rss["guid"] = sh.hexdigest()
|
rss["guid"] = sh.hexdigest()
|
||||||
rss["title"] = article["title"]
|
rss["title"] = article.get("title") or article.get("socialTeaserTitle")
|
||||||
rss["link"] = article["url"]
|
rss["link"] = article["url"]
|
||||||
|
|
||||||
if article.get("introduction"):
|
summary = article.get("introduction") or article.get(
|
||||||
rss["summary"] = article["introduction"]
|
"socialTeaserIntroduction"
|
||||||
|
)
|
||||||
|
if summary:
|
||||||
|
rss["summary"] = summary
|
||||||
|
|
||||||
if article.get("authors"):
|
if article.get("authors"):
|
||||||
as_str = ""
|
as_str = ""
|
||||||
|
|
@ -335,17 +338,18 @@ class PangeaService:
|
||||||
as_str = as_str[0 : (len(as_str) - 2)]
|
as_str = as_str[0 : (len(as_str) - 2)]
|
||||||
rss["authors"] = as_str
|
rss["authors"] = as_str
|
||||||
|
|
||||||
if article.get("image"):
|
image = article.get("image") or article.get("socialTeaserImage")
|
||||||
|
if image:
|
||||||
# Seek the enclosure details from the image's server
|
# Seek the enclosure details from the image's server
|
||||||
metadata = utilities.get_media_metadata(article["image"])
|
metadata = utilities.get_media_metadata(image)
|
||||||
if metadata:
|
if metadata:
|
||||||
rss["enclosure"] = {
|
rss["enclosure"] = {
|
||||||
"url": article["image"],
|
"url": image,
|
||||||
"type": metadata["content_type"],
|
"type": metadata["content_type"],
|
||||||
"length": metadata["content_length"],
|
"length": metadata["content_length"],
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
rss["enclosure"] = {"url": article["image"]}
|
rss["enclosure"] = {"url": image}
|
||||||
|
|
||||||
if rss.get("enclosure"):
|
if rss.get("enclosure"):
|
||||||
if self._verbose_p:
|
if self._verbose_p:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from pygea import pangeaservice
|
import pygea.pangeaservice as pangeaservice
|
||||||
|
|
||||||
ARTICLE_WITH_TWEET_SNIPPETS = """
|
ARTICLE_WITH_TWEET_SNIPPETS = """
|
||||||
<div contenteditable="false" class="tag_image tag_snippet" mode="infographics|plain|389510|large||Trinity Audio Embed" querystring=""></div>
|
<div contenteditable="false" class="tag_image tag_snippet" mode="infographics|plain|389510|large||Trinity Audio Embed" querystring=""></div>
|
||||||
|
|
@ -140,6 +140,7 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets(
|
||||||
"https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108,
|
"https://www.martinoticias.com/a/29036.html?parameterid=58108": SNIPPET_PAGE_58108,
|
||||||
"https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109,
|
"https://www.martinoticias.com/a/29036.html?parameterid=58109": SNIPPET_PAGE_58109,
|
||||||
}.get(url),
|
}.get(url),
|
||||||
|
raising=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
rss_article = service.rss_article_from_pangea_article(
|
rss_article = service.rss_article_from_pangea_article(
|
||||||
|
|
@ -154,3 +155,138 @@ def test_rss_article_from_pangea_article_resolves_supported_snippets(
|
||||||
assert 'class="twitter-tweet"' in rss_article["content"]
|
assert 'class="twitter-tweet"' in rss_article["content"]
|
||||||
assert "@HenryAlviarez" in rss_article["content"]
|
assert "@HenryAlviarez" in rss_article["content"]
|
||||||
assert "@MariaCorinaYA" in rss_article["content"]
|
assert "@MariaCorinaYA" in rss_article["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_article_from_pangea_article_uses_social_teaser_as_summary_fallback(
|
||||||
|
monkeypatch,
|
||||||
|
) -> None:
|
||||||
|
service = object.__new__(pangeaservice.PangeaService)
|
||||||
|
service._verbose_p = False
|
||||||
|
service._domain = "www.martinoticias.com"
|
||||||
|
service._rev_categories = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
pangeaservice.utilities,
|
||||||
|
"get_media_metadata",
|
||||||
|
lambda _url: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
service,
|
||||||
|
"_fetch_snippet_page_html",
|
||||||
|
lambda _url: None,
|
||||||
|
raising=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
rss_article = service.rss_article_from_pangea_article(
|
||||||
|
{
|
||||||
|
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
|
||||||
|
"title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio',
|
||||||
|
"introduction": "",
|
||||||
|
"socialTeaserIntroduction": "Resumen corto para tarjetas y redes.",
|
||||||
|
"content": "<p>Contenido completo del articulo.</p>",
|
||||||
|
"pubDate": "2026-04-01T13:30:32",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rss_article["summary"] == "Resumen corto para tarjetas y redes."
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_article_from_pangea_article_prefers_introduction_over_social_teaser(
|
||||||
|
monkeypatch,
|
||||||
|
) -> None:
|
||||||
|
service = object.__new__(pangeaservice.PangeaService)
|
||||||
|
service._verbose_p = False
|
||||||
|
service._domain = "www.martinoticias.com"
|
||||||
|
service._rev_categories = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
pangeaservice.utilities,
|
||||||
|
"get_media_metadata",
|
||||||
|
lambda _url: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
service,
|
||||||
|
"_fetch_snippet_page_html",
|
||||||
|
lambda _url: None,
|
||||||
|
raising=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
rss_article = service.rss_article_from_pangea_article(
|
||||||
|
{
|
||||||
|
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
|
||||||
|
"title": 'Cambios en Venezuela culminaran con "elecciones libres y justas", dice Rubio',
|
||||||
|
"introduction": "Introduccion canonica.",
|
||||||
|
"socialTeaserIntroduction": "Resumen social.",
|
||||||
|
"content": "<p>Contenido completo del articulo.</p>",
|
||||||
|
"pubDate": "2026-04-01T13:30:32",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rss_article["summary"] == "Introduccion canonica."
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_article_from_pangea_article_uses_social_teaser_title_as_fallback(
|
||||||
|
monkeypatch,
|
||||||
|
) -> None:
|
||||||
|
service = object.__new__(pangeaservice.PangeaService)
|
||||||
|
service._verbose_p = False
|
||||||
|
service._domain = "www.martinoticias.com"
|
||||||
|
service._rev_categories = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
pangeaservice.utilities,
|
||||||
|
"get_media_metadata",
|
||||||
|
lambda _url: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
service,
|
||||||
|
"_fetch_snippet_page_html",
|
||||||
|
lambda _url: None,
|
||||||
|
raising=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
rss_article = service.rss_article_from_pangea_article(
|
||||||
|
{
|
||||||
|
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
|
||||||
|
"title": "",
|
||||||
|
"socialTeaserTitle": "Titulo para redes.",
|
||||||
|
"content": "<p>Contenido completo del articulo.</p>",
|
||||||
|
"pubDate": "2026-04-01T13:30:32",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rss_article["title"] == "Titulo para redes."
|
||||||
|
|
||||||
|
|
||||||
|
def test_rss_article_from_pangea_article_uses_social_teaser_image_as_fallback(
|
||||||
|
monkeypatch,
|
||||||
|
) -> None:
|
||||||
|
service = object.__new__(pangeaservice.PangeaService)
|
||||||
|
service._verbose_p = False
|
||||||
|
service._domain = "www.martinoticias.com"
|
||||||
|
service._rev_categories = {}
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
pangeaservice.utilities,
|
||||||
|
"get_media_metadata",
|
||||||
|
lambda _url: None,
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
service,
|
||||||
|
"_fetch_snippet_page_html",
|
||||||
|
lambda _url: None,
|
||||||
|
raising=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
rss_article = service.rss_article_from_pangea_article(
|
||||||
|
{
|
||||||
|
"url": "https://www.martinoticias.com/a/cambios-en-venezuela/454274.html",
|
||||||
|
"title": "Titulo canonico.",
|
||||||
|
"image": "",
|
||||||
|
"socialTeaserImage": "https://www.martinoticias.com/social.jpg",
|
||||||
|
"content": "<p>Contenido completo del articulo.</p>",
|
||||||
|
"pubDate": "2026-04-01T13:30:32",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
assert rss_article["enclosure"]["url"] == "https://www.martinoticias.com/social.jpg"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue