switch to TOML config and export republisher feed manifests

This commit is contained in:
Abel Luck 2026-03-29 14:46:57 +02:00
parent 98dcea4d7e
commit 897af2872c
17 changed files with 832 additions and 324 deletions

6
tests/conftest.py Normal file
View file

@ -0,0 +1,6 @@
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))

73
tests/test_config.py Normal file
View file

@ -0,0 +1,73 @@
from pathlib import Path
import pytest
from pygea.config import load_config
def test_load_config_resolves_relative_paths_and_preserves_feed_fields(
tmp_path: Path,
) -> None:
config_path = tmp_path / "configs" / "pygea.toml"
config_path.parent.mkdir(parents=True)
config_path.write_text(
"""
domain = "www.martinoticias.com"
default_content_type = "articles"
[[feeds]]
name = "Info Martí "
slug = "info-marti"
only_newest = false
content_type = "articles"
[runtime]
api_key = "demo-key"
max_articles = 25
oldest_article = 7
verbose_p = false
[results]
output_directory = "../feed-out"
output_file_name = "rss.xml"
[logging]
log_file = "../logs/pygea.log"
default_log_level = "INFO"
""".strip()
+ "\n",
encoding="utf-8",
)
config = load_config(config_path)
assert config.domain == "www.martinoticias.com"
assert config.default_content_type == "articles"
assert config.results.output_directory == (tmp_path / "feed-out").resolve()
assert config.logging.log_file == (tmp_path / "logs" / "pygea.log").resolve()
assert config.feeds == (
{
"name": "Info Martí ",
"slug": "info-marti",
"only_newest": False,
"content_type": "articles",
},
)
def test_load_config_rejects_invalid_slug(tmp_path: Path) -> None:
config_path = tmp_path / "pygea.toml"
config_path.write_text(
"""
domain = "www.martinoticias.com"
[[feeds]]
name = "Titulares"
slug = "bad slug"
""".strip()
+ "\n",
encoding="utf-8",
)
with pytest.raises(ValueError, match="Feed slug"):
load_config(config_path)

84
tests/test_entrypoint.py Normal file
View file

@ -0,0 +1,84 @@
import tomllib
from pathlib import Path
from pygea import main as main_module
class StubPangeaFeed:
def __init__(self, config, feeds):
self.config = config
self.feed = feeds[0]
def acquire_content(self) -> None:
return None
def generate_feed(self) -> None:
return None
def disgorge(self, slug: str):
output_path = self.config.results.output_directory / slug / "rss.xml"
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text("<rss />\n", encoding="utf-8")
return output_path
def test_main_writes_manifest_toml_with_absolute_file_urls(
tmp_path: Path, monkeypatch
) -> None:
config_path = tmp_path / "pygea.toml"
config_path.write_text(
"""
domain = "www.martinoticias.com"
[[feeds]]
name = "Info Martí "
slug = "info-marti"
only_newest = false
[[feeds]]
name = "Titulares"
slug = "titulares"
only_newest = true
content_type = "articles"
[runtime]
api_key = "demo-key"
verbose_p = false
[results]
output_directory = "feed"
output_file_name = "rss.xml"
[logging]
log_file = "logs/pygea.log"
default_log_level = "INFO"
""".strip()
+ "\n",
encoding="utf-8",
)
monkeypatch.setattr(main_module, "feed_class", lambda: StubPangeaFeed)
exit_code = main_module.main(["--config", str(config_path)])
manifest_path = tmp_path / "feed" / "manifest.toml"
assert exit_code == 0
assert manifest_path.exists()
manifest = tomllib.loads(manifest_path.read_text(encoding="utf-8"))
assert manifest == {
"feeds": [
{
"name": "Info Martí ",
"slug": "info-marti",
"url": (tmp_path / "feed" / "info-marti" / "rss.xml")
.resolve()
.as_uri(),
},
{
"name": "Titulares",
"slug": "titulares",
"url": (tmp_path / "feed" / "titulares" / "rss.xml").resolve().as_uri(),
},
]
}