from pathlib import Path import pytest from repub import job_runner as job_runner_module from repub.config import FeedConfig, feed_output_path, staged_feed_output_path from repub.job_runner import JobSourceConfig, _build_crawl_settings VALID_FEED = 'new\n' def test_build_crawl_settings_passes_feed_url_to_spider(tmp_path: Path) -> None: settings = _build_crawl_settings( out_dir=tmp_path / "out", feed=FeedConfig( name="Demo Feed", slug="demo", url="https://source.example/feed.rss", ), stats_path=tmp_path / "stats.jsonl", feed_url="https://mirror.example", ) assert settings["REPUBLISHER_FEED_URL"] == "https://mirror.example" def test_build_crawl_settings_requires_non_empty_feed_url( tmp_path: Path, ) -> None: with pytest.raises(ValueError, match="feed_url setting is required"): _build_crawl_settings( out_dir=tmp_path / "out", feed=FeedConfig( name="Demo Feed", slug="demo", url="https://source.example/feed.rss", ), stats_path=tmp_path / "stats.jsonl", feed_url="", ) def test_main_publishes_staged_feed_after_successful_crawl( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: out_dir = tmp_path / "out" public_path = feed_output_path(out_dir=out_dir, feed_slug="demo") staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo") public_path.parent.mkdir(parents=True) public_path.write_text("old\n", encoding="utf-8") staged_path.write_text(VALID_FEED, encoding="utf-8") _patch_worker_dependencies(monkeypatch, exit_code=0) exit_code = job_runner_module.main( [ "--job-id", "1", "--execution-id", "2", "--db-path", str(tmp_path / "republisher.db"), "--out-dir", str(out_dir), "--stats-path", str(tmp_path / "stats.jsonl"), ] ) assert exit_code == 0 assert public_path.read_text(encoding="utf-8") == VALID_FEED assert not staged_path.exists() def test_main_does_not_publish_unusable_staged_feed_after_successful_crawl( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: out_dir = tmp_path / "out" public_path = feed_output_path(out_dir=out_dir, feed_slug="demo") staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo") public_path.parent.mkdir(parents=True) public_path.write_text("old\n", encoding="utf-8") staged_path.write_text('\n', encoding="utf-8") _patch_worker_dependencies(monkeypatch, exit_code=0) exit_code = job_runner_module.main( [ "--job-id", "1", "--execution-id", "2", "--db-path", str(tmp_path / "republisher.db"), "--out-dir", str(out_dir), "--stats-path", str(tmp_path / "stats.jsonl"), ] ) assert exit_code == 1 assert public_path.read_text(encoding="utf-8") == "old\n" assert staged_path.read_text(encoding="utf-8") == '\n' def test_main_does_not_publish_staged_feed_after_failed_crawl( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: out_dir = tmp_path / "out" public_path = feed_output_path(out_dir=out_dir, feed_slug="demo") staged_path = staged_feed_output_path(out_dir=out_dir, feed_slug="demo") public_path.parent.mkdir(parents=True) public_path.write_text("old\n", encoding="utf-8") staged_path.write_text(VALID_FEED, encoding="utf-8") _patch_worker_dependencies(monkeypatch, exit_code=1) exit_code = job_runner_module.main( [ "--job-id", "1", "--execution-id", "2", "--db-path", str(tmp_path / "republisher.db"), "--out-dir", str(out_dir), "--stats-path", str(tmp_path / "stats.jsonl"), ] ) assert exit_code == 1 assert public_path.read_text(encoding="utf-8") == "old\n" assert staged_path.read_text(encoding="utf-8") == VALID_FEED def _patch_worker_dependencies( monkeypatch: pytest.MonkeyPatch, *, exit_code: int ) -> None: monkeypatch.setattr( job_runner_module, "_load_job_source_config", lambda *, db_path, job_id: JobSourceConfig( source_name="Demo", source_slug="demo", source_type="feed", spider_arguments={}, feed_url="https://source.example/feed.rss", ), ) monkeypatch.setattr( job_runner_module, "load_feed_url", lambda: "https://mirror.example" ) monkeypatch.setattr( job_runner_module, "CrawlerProcess", lambda settings: object(), ) monkeypatch.setattr( job_runner_module, "_run_crawl", lambda *, process, feed, spider_arguments: exit_code, )