republisher/tests/test_job_runner.py

166 lines
5.1 KiB
Python
Raw Normal View History

2026-03-31 12:14:47 +02:00
from pathlib import Path
import pytest
2026-05-27 10:57:21 +02:00
from repub import job_runner as job_runner_module
from repub.config import FeedConfig, feed_output_path, staged_feed_output_path
from repub.job_runner import JobSourceConfig, _build_crawl_settings
VALID_FEED = '<rss version="2.0"><channel><title>new</title></channel></rss>\n'
2026-03-31 12:14:47 +02:00
def test_build_crawl_settings_passes_feed_url_to_spider(tmp_path: Path) -> None:
settings = _build_crawl_settings(
out_dir=tmp_path / "out",
feed=FeedConfig(
name="Demo Feed",
slug="demo",
url="https://source.example/feed.rss",
),
stats_path=tmp_path / "stats.jsonl",
feed_url="https://mirror.example",
)
assert settings["REPUBLISHER_FEED_URL"] == "https://mirror.example"
def test_build_crawl_settings_requires_non_empty_feed_url(
tmp_path: Path,
) -> None:
with pytest.raises(ValueError, match="feed_url setting is required"):
_build_crawl_settings(
out_dir=tmp_path / "out",
feed=FeedConfig(
name="Demo Feed",
slug="demo",
url="https://source.example/feed.rss",
),
stats_path=tmp_path / "stats.jsonl",
feed_url="",
)
2026-05-27 10:57:21 +02:00
def test_main_publishes_staged_feed_after_successful_crawl(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
out_dir = tmp_path / "out"
public_path = feed_output_path(out_dir=out_dir, feed_slug="demo")
staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo")
public_path.parent.mkdir(parents=True)
public_path.write_text("<rss>old</rss>\n", encoding="utf-8")
staged_path.write_text(VALID_FEED, encoding="utf-8")
_patch_worker_dependencies(monkeypatch, exit_code=0)
exit_code = job_runner_module.main(
[
"--job-id",
"1",
"--execution-id",
"2",
"--db-path",
str(tmp_path / "republisher.db"),
"--out-dir",
str(out_dir),
"--stats-path",
str(tmp_path / "stats.jsonl"),
]
)
assert exit_code == 0
assert public_path.read_text(encoding="utf-8") == VALID_FEED
assert not staged_path.exists()
def test_main_does_not_publish_unusable_staged_feed_after_successful_crawl(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
out_dir = tmp_path / "out"
public_path = feed_output_path(out_dir=out_dir, feed_slug="demo")
staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo")
public_path.parent.mkdir(parents=True)
public_path.write_text("<rss>old</rss>\n", encoding="utf-8")
staged_path.write_text('<rss version="2.0"/>\n', encoding="utf-8")
_patch_worker_dependencies(monkeypatch, exit_code=0)
exit_code = job_runner_module.main(
[
"--job-id",
"1",
"--execution-id",
"2",
"--db-path",
str(tmp_path / "republisher.db"),
"--out-dir",
str(out_dir),
"--stats-path",
str(tmp_path / "stats.jsonl"),
]
)
assert exit_code == 1
assert public_path.read_text(encoding="utf-8") == "<rss>old</rss>\n"
assert staged_path.read_text(encoding="utf-8") == '<rss version="2.0"/>\n'
def test_main_does_not_publish_staged_feed_after_failed_crawl(
monkeypatch: pytest.MonkeyPatch, tmp_path: Path
) -> None:
out_dir = tmp_path / "out"
public_path = feed_output_path(out_dir=out_dir, feed_slug="demo")
staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo")
public_path.parent.mkdir(parents=True)
public_path.write_text("<rss>old</rss>\n", encoding="utf-8")
staged_path.write_text(VALID_FEED, encoding="utf-8")
_patch_worker_dependencies(monkeypatch, exit_code=1)
exit_code = job_runner_module.main(
[
"--job-id",
"1",
"--execution-id",
"2",
"--db-path",
str(tmp_path / "republisher.db"),
"--out-dir",
str(out_dir),
"--stats-path",
str(tmp_path / "stats.jsonl"),
]
)
assert exit_code == 1
assert public_path.read_text(encoding="utf-8") == "<rss>old</rss>\n"
assert staged_path.read_text(encoding="utf-8") == VALID_FEED
def _patch_worker_dependencies(
monkeypatch: pytest.MonkeyPatch, *, exit_code: int
) -> None:
monkeypatch.setattr(
job_runner_module,
"_load_job_source_config",
lambda *, db_path, job_id: JobSourceConfig(
source_name="Demo",
source_slug="demo",
source_type="feed",
spider_arguments={},
feed_url="https://source.example/feed.rss",
),
)
monkeypatch.setattr(
job_runner_module, "load_feed_url", lambda: "https://mirror.example"
)
monkeypatch.setattr(
job_runner_module,
"CrawlerProcess",
lambda settings: object(),
)
monkeypatch.setattr(
job_runner_module,
"_run_crawl",
lambda *, process, feed, spider_arguments: exit_code,
)