republisher/repub/entrypoint.py

from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings

from .spiders.rss_spider import RssFeedSpider

from .postprocessing import SortRssItems

from . import colorlog

base_settings = get_project_settings()

settings = {
    **base_settings,
    "FEEDS": {
        "out/feed.rss": {
            "format": "rss",
            "postprocessing": [],
        },
    },
}

colorlog.load_colorlog()


urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"]


def entrypoint():
    process = CrawlerProcess(settings)

    process.crawl(RssFeedSpider, urls=urls)
    process.start()  # the script will block here until the crawling is finished