from pathlib import Path from types import SimpleNamespace import pytest from repub.config import ( FeedConfig, RepublisherConfig, build_base_settings, build_feed_settings, ) from repub.pipelines import AudioPipeline, FilePipeline, VideoPipeline def build_test_crawler(tmp_path: Path) -> SimpleNamespace: out_dir = (tmp_path / "mirror").resolve() config = RepublisherConfig( config_path=tmp_path / "repub.toml", out_dir=out_dir, feeds=( FeedConfig( name="nasa", url="https://www.nasa.gov/rss/dyn/breaking_news.rss", ), ), scrapy_settings={}, ) base_settings = build_base_settings(config) settings = build_feed_settings(base_settings, out_dir=out_dir, feed_name="nasa") return SimpleNamespace(settings=settings, request_fingerprinter=object()) @pytest.mark.parametrize( ("pipeline_cls", "store_setting"), [ (AudioPipeline, "AUDIO_STORE"), (VideoPipeline, "VIDEO_STORE"), (FilePipeline, "FILES_STORE"), ], ) def test_pipeline_from_crawler_uses_configured_store( tmp_path: Path, pipeline_cls, store_setting: str ) -> None: crawler = build_test_crawler(tmp_path) pipeline = pipeline_cls.from_crawler(crawler) assert pipeline.settings is crawler.settings assert pipeline.store.basedir == crawler.settings[store_setting]