185 lines
5.2 KiB
Python
185 lines
5.2 KiB
Python
from os import path as os_path
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from repub.config import (
|
|
FeedConfig,
|
|
RepublisherConfig,
|
|
build_base_settings,
|
|
build_feed_settings,
|
|
load_config,
|
|
)
|
|
|
|
|
|
def test_load_config_resolves_relative_out_dir_and_merges_imported_feeds(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
manifest_path = tmp_path / "imports" / "manifest.toml"
|
|
manifest_path.parent.mkdir(parents=True)
|
|
manifest_path.write_text(
|
|
"""
|
|
[[feeds]]
|
|
name = "Info Martí "
|
|
slug = "info-marti"
|
|
url = "file:///srv/pygea/info-marti/rss.xml"
|
|
""".strip()
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
config_path = tmp_path / "configs" / "repub.toml"
|
|
config_path.parent.mkdir(parents=True)
|
|
manifest_ref = os_path.relpath(manifest_path, start=config_path.parent)
|
|
config_path.write_text(
|
|
f"""
|
|
out_dir = "../mirror"
|
|
feed_config_files = ["{manifest_ref}"]
|
|
|
|
[[feeds]]
|
|
name = "Guardian Project Podcast"
|
|
slug = "gp-pod"
|
|
url = "https://guardianproject.info/podcast/podcast.xml"
|
|
""".strip()
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
config = load_config(config_path)
|
|
|
|
assert config.out_dir == (tmp_path / "mirror").resolve()
|
|
assert config.feeds == (
|
|
FeedConfig(
|
|
name="Guardian Project Podcast",
|
|
slug="gp-pod",
|
|
url="https://guardianproject.info/podcast/podcast.xml",
|
|
),
|
|
FeedConfig(
|
|
name="Info Martí ",
|
|
slug="info-marti",
|
|
url="file:///srv/pygea/info-marti/rss.xml",
|
|
),
|
|
)
|
|
|
|
|
|
def test_load_config_preserves_absolute_out_dir(tmp_path: Path) -> None:
|
|
absolute_out_dir = (tmp_path / "absolute-out").resolve()
|
|
config_path = tmp_path / "repub.toml"
|
|
config_path.write_text(
|
|
f"""
|
|
out_dir = "{absolute_out_dir}"
|
|
|
|
[[feeds]]
|
|
name = "NASA Breaking News"
|
|
slug = "nasa"
|
|
url = "https://www.nasa.gov/rss/dyn/breaking_news.rss"
|
|
""".strip()
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
config = load_config(config_path)
|
|
|
|
assert config.out_dir == absolute_out_dir
|
|
|
|
|
|
def test_load_config_rejects_duplicate_imported_slugs(tmp_path: Path) -> None:
|
|
manifest_path = tmp_path / "manifest.toml"
|
|
manifest_path.write_text(
|
|
"""
|
|
[[feeds]]
|
|
name = "Imported Feed"
|
|
slug = "shared-slug"
|
|
url = "file:///srv/pygea/shared-slug/rss.xml"
|
|
""".strip()
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
config_path = tmp_path / "repub.toml"
|
|
config_path.write_text(
|
|
f"""
|
|
out_dir = "out"
|
|
feed_config_files = ["{manifest_path.name}"]
|
|
|
|
[[feeds]]
|
|
name = "Local Feed"
|
|
slug = "shared-slug"
|
|
url = "https://example.com/feed.xml"
|
|
""".strip()
|
|
+ "\n",
|
|
encoding="utf-8",
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Feed slug"):
|
|
load_config(config_path)
|
|
|
|
|
|
def test_build_feed_settings_derives_output_paths_from_feed_slug(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
out_dir = (tmp_path / "mirror").resolve()
|
|
config = RepublisherConfig(
|
|
config_path=tmp_path / "repub.toml",
|
|
out_dir=out_dir,
|
|
feeds=(
|
|
FeedConfig(
|
|
name="Info Martí ",
|
|
slug="info-marti",
|
|
url="file:///srv/pygea/info-marti/rss.xml",
|
|
),
|
|
),
|
|
scrapy_settings={"LOG_LEVEL": "DEBUG"},
|
|
)
|
|
|
|
base_settings = build_base_settings(config)
|
|
feed_settings = build_feed_settings(
|
|
base_settings, out_dir=out_dir, feed_slug="info-marti"
|
|
)
|
|
|
|
assert base_settings["LOG_LEVEL"] == "DEBUG"
|
|
assert feed_settings["REPUBLISHER_OUT_DIR"] == str(out_dir)
|
|
assert feed_settings["LOG_FILE"] == str(out_dir / "logs" / "info-marti.log")
|
|
assert feed_settings["HTTPCACHE_DIR"] == str(out_dir / "httpcache")
|
|
assert feed_settings["IMAGES_STORE"] == str(out_dir / "info-marti" / "images")
|
|
assert feed_settings["AUDIO_STORE"] == str(out_dir / "info-marti" / "audio")
|
|
assert feed_settings["VIDEO_STORE"] == str(out_dir / "info-marti" / "video")
|
|
assert feed_settings["FILES_STORE"] == str(out_dir / "info-marti" / "files")
|
|
assert feed_settings["FEEDS"] == {
|
|
str(out_dir / "info-marti.rss"): {
|
|
"format": "rss",
|
|
"postprocessing": [],
|
|
"feed_name": "info-marti",
|
|
}
|
|
}
|
|
|
|
|
|
def test_build_feed_settings_uses_runtime_media_dir_overrides(tmp_path: Path) -> None:
|
|
out_dir = (tmp_path / "mirror").resolve()
|
|
config = RepublisherConfig(
|
|
config_path=tmp_path / "repub.toml",
|
|
out_dir=out_dir,
|
|
feeds=(
|
|
FeedConfig(
|
|
name="Guardian Project Podcast",
|
|
slug="gp-pod",
|
|
url="https://guardianproject.info/podcast/podcast.xml",
|
|
),
|
|
),
|
|
scrapy_settings={
|
|
"REPUBLISHER_VIDEO_DIR": "videos-custom",
|
|
"REPUBLISHER_AUDIO_DIR": "audio-custom",
|
|
},
|
|
)
|
|
|
|
base_settings = build_base_settings(config)
|
|
feed_settings = build_feed_settings(
|
|
base_settings,
|
|
out_dir=out_dir,
|
|
feed_slug="gp-pod",
|
|
)
|
|
|
|
assert feed_settings["REPUBLISHER_VIDEO_DIR"] == "videos-custom"
|
|
assert feed_settings["REPUBLISHER_AUDIO_DIR"] == "audio-custom"
|
|
assert feed_settings["VIDEO_STORE"] == str(out_dir / "gp-pod" / "videos-custom")
|
|
assert feed_settings["AUDIO_STORE"] == str(out_dir / "gp-pod" / "audio-custom")
|