Fix Scrapy media pipeline initialization
This commit is contained in:
parent
34d26f7def
commit
20b9759193
2 changed files with 71 additions and 19 deletions
49
tests/test_pipelines.py
Normal file
49
tests/test_pipelines.py
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from repub.config import (
|
||||
FeedConfig,
|
||||
RepublisherConfig,
|
||||
build_base_settings,
|
||||
build_feed_settings,
|
||||
)
|
||||
from repub.pipelines import AudioPipeline, FilePipeline, VideoPipeline
|
||||
|
||||
|
||||
def build_test_crawler(tmp_path: Path) -> SimpleNamespace:
|
||||
out_dir = (tmp_path / "mirror").resolve()
|
||||
config = RepublisherConfig(
|
||||
config_path=tmp_path / "repub.toml",
|
||||
out_dir=out_dir,
|
||||
feeds=(
|
||||
FeedConfig(
|
||||
name="nasa",
|
||||
url="https://www.nasa.gov/rss/dyn/breaking_news.rss",
|
||||
),
|
||||
),
|
||||
scrapy_settings={},
|
||||
)
|
||||
base_settings = build_base_settings(config)
|
||||
settings = build_feed_settings(base_settings, out_dir=out_dir, feed_name="nasa")
|
||||
return SimpleNamespace(settings=settings, request_fingerprinter=object())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("pipeline_cls", "store_setting"),
|
||||
[
|
||||
(AudioPipeline, "AUDIO_STORE"),
|
||||
(VideoPipeline, "VIDEO_STORE"),
|
||||
(FilePipeline, "FILES_STORE"),
|
||||
],
|
||||
)
|
||||
def test_pipeline_from_crawler_uses_configured_store(
|
||||
tmp_path: Path, pipeline_cls, store_setting: str
|
||||
) -> None:
|
||||
crawler = build_test_crawler(tmp_path)
|
||||
|
||||
pipeline = pipeline_cls.from_crawler(crawler)
|
||||
|
||||
assert pipeline.settings is crawler.settings
|
||||
assert pipeline.store.basedir == crawler.settings[store_setting]
|
||||
Loading…
Add table
Add a link
Reference in a new issue