from scrapy.crawler import CrawlerProcess from scrapy.utils.project import get_project_settings from . import colorlog from .postprocessing import SortRssItems from .spiders.rss_spider import RssFeedSpider base_settings = get_project_settings() settings = { **base_settings, "FEEDS": { "out/feed.rss": { "format": "rss", "postprocessing": [], }, }, } colorlog.load_colorlog() urls = ["https://www.nasa.gov/rss/dyn/breaking_news.rss"] def entrypoint(): process = CrawlerProcess(settings) process.crawl(RssFeedSpider, urls=urls) process.start() # the script will block here until the crawling is finished